2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include <linux/kthread.h>
27 #include "../i915_selftest.h"
29 #include "mock_context.h"
33 struct drm_i915_private *i915;
34 struct drm_i915_gem_object *hws;
35 struct drm_i915_gem_object *obj;
36 struct i915_gem_context *ctx;
41 static int hang_init(struct hang *h, struct drm_i915_private *i915)
46 memset(h, 0, sizeof(*h));
49 h->ctx = kernel_context(i915);
51 return PTR_ERR(h->ctx);
53 h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE);
55 err = PTR_ERR(h->hws);
59 h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
61 err = PTR_ERR(h->obj);
65 i915_gem_object_set_cache_level(h->hws, I915_CACHE_LLC);
66 vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB);
71 h->seqno = memset(vaddr, 0xff, PAGE_SIZE);
73 vaddr = i915_gem_object_pin_map(h->obj,
74 HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC);
84 i915_gem_object_unpin_map(h->hws);
86 i915_gem_object_put(h->obj);
88 i915_gem_object_put(h->hws);
90 kernel_context_close(h->ctx);
94 static u64 hws_address(const struct i915_vma *hws,
95 const struct i915_request *rq)
97 return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context);
100 static int emit_recurse_batch(struct hang *h,
101 struct i915_request *rq)
103 struct drm_i915_private *i915 = h->i915;
104 struct i915_address_space *vm = rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &i915->ggtt.base;
105 struct i915_vma *hws, *vma;
110 vma = i915_vma_instance(h->obj, vm, NULL);
114 hws = i915_vma_instance(h->hws, vm, NULL);
118 err = i915_vma_pin(vma, 0, 0, PIN_USER);
122 err = i915_vma_pin(hws, 0, 0, PIN_USER);
126 i915_vma_move_to_active(vma, rq, 0);
127 if (!i915_gem_object_has_active_reference(vma->obj)) {
128 i915_gem_object_get(vma->obj);
129 i915_gem_object_set_active_reference(vma->obj);
132 i915_vma_move_to_active(hws, rq, 0);
133 if (!i915_gem_object_has_active_reference(hws->obj)) {
134 i915_gem_object_get(hws->obj);
135 i915_gem_object_set_active_reference(hws->obj);
139 if (INTEL_GEN(i915) >= 8) {
140 *batch++ = MI_STORE_DWORD_IMM_GEN4;
141 *batch++ = lower_32_bits(hws_address(hws, rq));
142 *batch++ = upper_32_bits(hws_address(hws, rq));
143 *batch++ = rq->fence.seqno;
144 *batch++ = MI_ARB_CHECK;
146 memset(batch, 0, 1024);
147 batch += 1024 / sizeof(*batch);
149 *batch++ = MI_ARB_CHECK;
150 *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
151 *batch++ = lower_32_bits(vma->node.start);
152 *batch++ = upper_32_bits(vma->node.start);
153 } else if (INTEL_GEN(i915) >= 6) {
154 *batch++ = MI_STORE_DWORD_IMM_GEN4;
156 *batch++ = lower_32_bits(hws_address(hws, rq));
157 *batch++ = rq->fence.seqno;
158 *batch++ = MI_ARB_CHECK;
160 memset(batch, 0, 1024);
161 batch += 1024 / sizeof(*batch);
163 *batch++ = MI_ARB_CHECK;
164 *batch++ = MI_BATCH_BUFFER_START | 1 << 8;
165 *batch++ = lower_32_bits(vma->node.start);
166 } else if (INTEL_GEN(i915) >= 4) {
167 *batch++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22;
169 *batch++ = lower_32_bits(hws_address(hws, rq));
170 *batch++ = rq->fence.seqno;
171 *batch++ = MI_ARB_CHECK;
173 memset(batch, 0, 1024);
174 batch += 1024 / sizeof(*batch);
176 *batch++ = MI_ARB_CHECK;
177 *batch++ = MI_BATCH_BUFFER_START | 2 << 6;
178 *batch++ = lower_32_bits(vma->node.start);
180 *batch++ = MI_STORE_DWORD_IMM;
181 *batch++ = lower_32_bits(hws_address(hws, rq));
182 *batch++ = rq->fence.seqno;
183 *batch++ = MI_ARB_CHECK;
185 memset(batch, 0, 1024);
186 batch += 1024 / sizeof(*batch);
188 *batch++ = MI_ARB_CHECK;
189 *batch++ = MI_BATCH_BUFFER_START | 2 << 6 | 1;
190 *batch++ = lower_32_bits(vma->node.start);
192 *batch++ = MI_BATCH_BUFFER_END; /* not reached */
193 i915_gem_chipset_flush(h->i915);
196 if (INTEL_GEN(vm->i915) <= 5)
197 flags |= I915_DISPATCH_SECURE;
199 err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
207 static struct i915_request *
208 hang_create_request(struct hang *h, struct intel_engine_cs *engine)
210 struct i915_request *rq;
213 if (i915_gem_object_is_active(h->obj)) {
214 struct drm_i915_gem_object *obj;
217 obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE);
219 return ERR_CAST(obj);
221 vaddr = i915_gem_object_pin_map(obj,
222 HAS_LLC(h->i915) ? I915_MAP_WB : I915_MAP_WC);
224 i915_gem_object_put(obj);
225 return ERR_CAST(vaddr);
228 i915_gem_object_unpin_map(h->obj);
229 i915_gem_object_put(h->obj);
235 rq = i915_request_alloc(engine, h->ctx);
239 err = emit_recurse_batch(h, rq);
241 __i915_request_add(rq, false);
248 static u32 hws_seqno(const struct hang *h, const struct i915_request *rq)
250 return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]);
254 struct delayed_work work;
255 struct drm_i915_private *i915;
259 static void wedge_me(struct work_struct *work)
261 struct wedge_me *w = container_of(work, typeof(*w), work.work);
263 pr_err("%pS timed out, cancelling all further testing.\n",
265 i915_gem_set_wedged(w->i915);
268 static void __init_wedge(struct wedge_me *w,
269 struct drm_i915_private *i915,
276 INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me);
277 schedule_delayed_work(&w->work, timeout);
280 static void __fini_wedge(struct wedge_me *w)
282 cancel_delayed_work_sync(&w->work);
283 destroy_delayed_work_on_stack(&w->work);
287 #define wedge_on_timeout(W, DEV, TIMEOUT) \
288 for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \
293 flush_test(struct drm_i915_private *i915, unsigned int flags)
299 wedge_on_timeout(&w, i915, HZ)
300 i915_gem_wait_for_idle(i915, flags);
302 return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0;
305 static void hang_fini(struct hang *h)
307 *h->batch = MI_BATCH_BUFFER_END;
308 i915_gem_chipset_flush(h->i915);
310 i915_gem_object_unpin_map(h->obj);
311 i915_gem_object_put(h->obj);
313 i915_gem_object_unpin_map(h->hws);
314 i915_gem_object_put(h->hws);
316 kernel_context_close(h->ctx);
318 flush_test(h->i915, I915_WAIT_LOCKED);
321 static bool wait_for_hang(struct hang *h, struct i915_request *rq)
323 return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq),
326 wait_for(i915_seqno_passed(hws_seqno(h, rq),
331 static int igt_hang_sanitycheck(void *arg)
333 struct drm_i915_private *i915 = arg;
334 struct i915_request *rq;
335 struct intel_engine_cs *engine;
336 enum intel_engine_id id;
340 /* Basic check that we can execute our hanging batch */
342 mutex_lock(&i915->drm.struct_mutex);
343 err = hang_init(&h, i915);
347 for_each_engine(engine, i915, id) {
350 if (!intel_engine_can_store_dword(engine))
353 rq = hang_create_request(&h, engine);
356 pr_err("Failed to create request for %s, err=%d\n",
361 i915_request_get(rq);
363 *h.batch = MI_BATCH_BUFFER_END;
364 i915_gem_chipset_flush(i915);
366 __i915_request_add(rq, true);
368 timeout = i915_request_wait(rq,
370 MAX_SCHEDULE_TIMEOUT);
371 i915_request_put(rq);
375 pr_err("Wait for request failed on %s, err=%d\n",
384 mutex_unlock(&i915->drm.struct_mutex);
388 static void global_reset_lock(struct drm_i915_private *i915)
390 struct intel_engine_cs *engine;
391 enum intel_engine_id id;
393 pr_debug("%s: current gpu_error=%08lx\n",
394 __func__, i915->gpu_error.flags);
396 while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags))
397 wait_event(i915->gpu_error.reset_queue,
398 !test_bit(I915_RESET_BACKOFF,
399 &i915->gpu_error.flags));
401 for_each_engine(engine, i915, id) {
402 while (test_and_set_bit(I915_RESET_ENGINE + id,
403 &i915->gpu_error.flags))
404 wait_on_bit(&i915->gpu_error.flags,
405 I915_RESET_ENGINE + id,
406 TASK_UNINTERRUPTIBLE);
410 static void global_reset_unlock(struct drm_i915_private *i915)
412 struct intel_engine_cs *engine;
413 enum intel_engine_id id;
415 for_each_engine(engine, i915, id)
416 clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
418 clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
419 wake_up_all(&i915->gpu_error.reset_queue);
422 static int igt_global_reset(void *arg)
424 struct drm_i915_private *i915 = arg;
425 unsigned int reset_count;
428 /* Check that we can issue a global GPU reset */
430 global_reset_lock(i915);
431 set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
433 mutex_lock(&i915->drm.struct_mutex);
434 reset_count = i915_reset_count(&i915->gpu_error);
436 i915_reset(i915, I915_RESET_QUIET);
438 if (i915_reset_count(&i915->gpu_error) == reset_count) {
439 pr_err("No GPU reset recorded!\n");
442 mutex_unlock(&i915->drm.struct_mutex);
444 GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
445 global_reset_unlock(i915);
447 if (i915_terminally_wedged(&i915->gpu_error))
453 static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
455 struct intel_engine_cs *engine;
456 enum intel_engine_id id;
460 /* Check that we can issue an engine reset on an idle engine (no-op) */
462 if (!intel_has_reset_engine(i915))
466 mutex_lock(&i915->drm.struct_mutex);
467 err = hang_init(&h, i915);
468 mutex_unlock(&i915->drm.struct_mutex);
473 for_each_engine(engine, i915, id) {
474 unsigned int reset_count, reset_engine_count;
475 IGT_TIMEOUT(end_time);
477 if (active && !intel_engine_can_store_dword(engine))
480 reset_count = i915_reset_count(&i915->gpu_error);
481 reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
484 set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
487 struct i915_request *rq;
489 mutex_lock(&i915->drm.struct_mutex);
490 rq = hang_create_request(&h, engine);
493 mutex_unlock(&i915->drm.struct_mutex);
497 i915_request_get(rq);
498 __i915_request_add(rq, true);
499 mutex_unlock(&i915->drm.struct_mutex);
501 if (!wait_for_hang(&h, rq)) {
502 struct drm_printer p = drm_info_printer(i915->drm.dev);
504 pr_err("%s: Failed to start request %x, at %x\n",
505 __func__, rq->fence.seqno, hws_seqno(&h, rq));
506 intel_engine_dump(engine, &p,
507 "%s\n", engine->name);
509 i915_request_put(rq);
514 i915_request_put(rq);
517 engine->hangcheck.stalled = true;
518 engine->hangcheck.seqno =
519 intel_engine_get_seqno(engine);
521 err = i915_reset_engine(engine, I915_RESET_QUIET);
523 pr_err("i915_reset_engine failed\n");
527 if (i915_reset_count(&i915->gpu_error) != reset_count) {
528 pr_err("Full GPU reset recorded! (engine reset expected)\n");
533 reset_engine_count += active;
534 if (i915_reset_engine_count(&i915->gpu_error, engine) !=
535 reset_engine_count) {
536 pr_err("%s engine reset %srecorded!\n",
537 engine->name, active ? "not " : "");
542 engine->hangcheck.stalled = false;
543 } while (time_before(jiffies, end_time));
544 clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
549 err = flush_test(i915, 0);
554 if (i915_terminally_wedged(&i915->gpu_error))
558 mutex_lock(&i915->drm.struct_mutex);
560 mutex_unlock(&i915->drm.struct_mutex);
566 static int igt_reset_idle_engine(void *arg)
568 return __igt_reset_engine(arg, false);
571 static int igt_reset_active_engine(void *arg)
573 return __igt_reset_engine(arg, true);
576 static int active_engine(void *data)
578 struct intel_engine_cs *engine = data;
579 struct i915_request *rq[2] = {};
580 struct i915_gem_context *ctx[2];
581 struct drm_file *file;
582 unsigned long count = 0;
585 file = mock_file(engine->i915);
587 return PTR_ERR(file);
589 mutex_lock(&engine->i915->drm.struct_mutex);
590 ctx[0] = live_context(engine->i915, file);
591 mutex_unlock(&engine->i915->drm.struct_mutex);
592 if (IS_ERR(ctx[0])) {
593 err = PTR_ERR(ctx[0]);
597 mutex_lock(&engine->i915->drm.struct_mutex);
598 ctx[1] = live_context(engine->i915, file);
599 mutex_unlock(&engine->i915->drm.struct_mutex);
600 if (IS_ERR(ctx[1])) {
601 err = PTR_ERR(ctx[1]);
602 i915_gem_context_put(ctx[0]);
606 while (!kthread_should_stop()) {
607 unsigned int idx = count++ & 1;
608 struct i915_request *old = rq[idx];
609 struct i915_request *new;
611 mutex_lock(&engine->i915->drm.struct_mutex);
612 new = i915_request_alloc(engine, ctx[idx]);
614 mutex_unlock(&engine->i915->drm.struct_mutex);
619 rq[idx] = i915_request_get(new);
620 i915_request_add(new);
621 mutex_unlock(&engine->i915->drm.struct_mutex);
624 i915_request_wait(old, 0, MAX_SCHEDULE_TIMEOUT);
625 i915_request_put(old);
629 for (count = 0; count < ARRAY_SIZE(rq); count++)
630 i915_request_put(rq[count]);
633 mock_file_free(engine->i915, file);
637 static int __igt_reset_engine_others(struct drm_i915_private *i915,
640 struct intel_engine_cs *engine, *other;
641 enum intel_engine_id id, tmp;
645 /* Check that issuing a reset on one engine does not interfere
646 * with any other engine.
649 if (!intel_has_reset_engine(i915))
653 mutex_lock(&i915->drm.struct_mutex);
654 err = hang_init(&h, i915);
655 mutex_unlock(&i915->drm.struct_mutex);
660 for_each_engine(engine, i915, id) {
661 struct task_struct *threads[I915_NUM_ENGINES] = {};
662 unsigned long resets[I915_NUM_ENGINES];
663 unsigned long global = i915_reset_count(&i915->gpu_error);
664 unsigned long count = 0;
665 IGT_TIMEOUT(end_time);
667 if (active && !intel_engine_can_store_dword(engine))
670 memset(threads, 0, sizeof(threads));
671 for_each_engine(other, i915, tmp) {
672 struct task_struct *tsk;
674 resets[tmp] = i915_reset_engine_count(&i915->gpu_error,
680 tsk = kthread_run(active_engine, other,
681 "igt/%s", other->name);
688 get_task_struct(tsk);
691 set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
694 struct i915_request *rq;
696 mutex_lock(&i915->drm.struct_mutex);
697 rq = hang_create_request(&h, engine);
700 mutex_unlock(&i915->drm.struct_mutex);
704 i915_request_get(rq);
705 __i915_request_add(rq, true);
706 mutex_unlock(&i915->drm.struct_mutex);
708 if (!wait_for_hang(&h, rq)) {
709 struct drm_printer p = drm_info_printer(i915->drm.dev);
711 pr_err("%s: Failed to start request %x, at %x\n",
712 __func__, rq->fence.seqno, hws_seqno(&h, rq));
713 intel_engine_dump(engine, &p,
714 "%s\n", engine->name);
716 i915_request_put(rq);
721 i915_request_put(rq);
724 engine->hangcheck.stalled = true;
725 engine->hangcheck.seqno =
726 intel_engine_get_seqno(engine);
728 err = i915_reset_engine(engine, I915_RESET_QUIET);
730 pr_err("i915_reset_engine(%s:%s) failed, err=%d\n",
731 engine->name, active ? "active" : "idle", err);
735 engine->hangcheck.stalled = false;
737 } while (time_before(jiffies, end_time));
738 clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
739 pr_info("i915_reset_engine(%s:%s): %lu resets\n",
740 engine->name, active ? "active" : "idle", count);
742 if (i915_reset_engine_count(&i915->gpu_error, engine) -
743 resets[engine->id] != (active ? count : 0)) {
744 pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n",
745 engine->name, active ? "active" : "idle", count,
746 i915_reset_engine_count(&i915->gpu_error,
747 engine) - resets[engine->id]);
753 for_each_engine(other, i915, tmp) {
759 ret = kthread_stop(threads[tmp]);
761 pr_err("kthread for other engine %s failed, err=%d\n",
766 put_task_struct(threads[tmp]);
768 if (resets[tmp] != i915_reset_engine_count(&i915->gpu_error,
770 pr_err("Innocent engine %s was reset (count=%ld)\n",
772 i915_reset_engine_count(&i915->gpu_error,
773 other) - resets[tmp]);
779 if (global != i915_reset_count(&i915->gpu_error)) {
780 pr_err("Global reset (count=%ld)!\n",
781 i915_reset_count(&i915->gpu_error) - global);
789 err = flush_test(i915, 0);
794 if (i915_terminally_wedged(&i915->gpu_error))
798 mutex_lock(&i915->drm.struct_mutex);
800 mutex_unlock(&i915->drm.struct_mutex);
806 static int igt_reset_idle_engine_others(void *arg)
808 return __igt_reset_engine_others(arg, false);
811 static int igt_reset_active_engine_others(void *arg)
813 return __igt_reset_engine_others(arg, true);
816 static u32 fake_hangcheck(struct i915_request *rq)
820 rq->engine->hangcheck.stalled = true;
821 rq->engine->hangcheck.seqno = intel_engine_get_seqno(rq->engine);
823 reset_count = i915_reset_count(&rq->i915->gpu_error);
825 set_bit(I915_RESET_HANDOFF, &rq->i915->gpu_error.flags);
826 wake_up_all(&rq->i915->gpu_error.wait_queue);
831 static int igt_wait_reset(void *arg)
833 struct drm_i915_private *i915 = arg;
834 struct i915_request *rq;
835 unsigned int reset_count;
840 if (!intel_engine_can_store_dword(i915->engine[RCS]))
843 /* Check that we detect a stuck waiter and issue a reset */
845 global_reset_lock(i915);
847 mutex_lock(&i915->drm.struct_mutex);
848 err = hang_init(&h, i915);
852 rq = hang_create_request(&h, i915->engine[RCS]);
858 i915_request_get(rq);
859 __i915_request_add(rq, true);
861 if (!wait_for_hang(&h, rq)) {
862 struct drm_printer p = drm_info_printer(i915->drm.dev);
864 pr_err("%s: Failed to start request %x, at %x\n",
865 __func__, rq->fence.seqno, hws_seqno(&h, rq));
866 intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
869 i915_gem_set_wedged(i915);
875 reset_count = fake_hangcheck(rq);
877 timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10);
879 pr_err("i915_request_wait failed on a stuck request: err=%ld\n",
885 GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
886 if (i915_reset_count(&i915->gpu_error) == reset_count) {
887 pr_err("No GPU reset recorded!\n");
893 i915_request_put(rq);
897 mutex_unlock(&i915->drm.struct_mutex);
898 global_reset_unlock(i915);
900 if (i915_terminally_wedged(&i915->gpu_error))
906 static int igt_reset_queue(void *arg)
908 struct drm_i915_private *i915 = arg;
909 struct intel_engine_cs *engine;
910 enum intel_engine_id id;
914 /* Check that we replay pending requests following a hang */
916 global_reset_lock(i915);
918 mutex_lock(&i915->drm.struct_mutex);
919 err = hang_init(&h, i915);
923 for_each_engine(engine, i915, id) {
924 struct i915_request *prev;
925 IGT_TIMEOUT(end_time);
928 if (!intel_engine_can_store_dword(engine))
931 prev = hang_create_request(&h, engine);
937 i915_request_get(prev);
938 __i915_request_add(prev, true);
942 struct i915_request *rq;
943 unsigned int reset_count;
945 rq = hang_create_request(&h, engine);
951 i915_request_get(rq);
952 __i915_request_add(rq, true);
954 if (!wait_for_hang(&h, prev)) {
955 struct drm_printer p = drm_info_printer(i915->drm.dev);
957 pr_err("%s: Failed to start request %x, at %x\n",
958 __func__, prev->fence.seqno, hws_seqno(&h, prev));
959 intel_engine_dump(prev->engine, &p,
960 "%s\n", prev->engine->name);
962 i915_request_put(rq);
963 i915_request_put(prev);
966 i915_gem_set_wedged(i915);
972 reset_count = fake_hangcheck(prev);
974 i915_reset(i915, I915_RESET_QUIET);
976 GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
977 &i915->gpu_error.flags));
979 if (prev->fence.error != -EIO) {
980 pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
982 i915_request_put(rq);
983 i915_request_put(prev);
988 if (rq->fence.error) {
989 pr_err("Fence error status not zero [%d] after unrelated reset\n",
991 i915_request_put(rq);
992 i915_request_put(prev);
997 if (i915_reset_count(&i915->gpu_error) == reset_count) {
998 pr_err("No GPU reset recorded!\n");
999 i915_request_put(rq);
1000 i915_request_put(prev);
1005 i915_request_put(prev);
1008 } while (time_before(jiffies, end_time));
1009 pr_info("%s: Completed %d resets\n", engine->name, count);
1011 *h.batch = MI_BATCH_BUFFER_END;
1012 i915_gem_chipset_flush(i915);
1014 i915_request_put(prev);
1016 err = flush_test(i915, I915_WAIT_LOCKED);
1024 mutex_unlock(&i915->drm.struct_mutex);
1025 global_reset_unlock(i915);
1027 if (i915_terminally_wedged(&i915->gpu_error))
1033 static int igt_handle_error(void *arg)
1035 struct drm_i915_private *i915 = arg;
1036 struct intel_engine_cs *engine = i915->engine[RCS];
1038 struct i915_request *rq;
1039 struct i915_gpu_state *error;
1042 /* Check that we can issue a global GPU and engine reset */
1044 if (!intel_has_reset_engine(i915))
1047 if (!intel_engine_can_store_dword(i915->engine[RCS]))
1050 mutex_lock(&i915->drm.struct_mutex);
1052 err = hang_init(&h, i915);
1056 rq = hang_create_request(&h, engine);
1062 i915_request_get(rq);
1063 __i915_request_add(rq, true);
1065 if (!wait_for_hang(&h, rq)) {
1066 struct drm_printer p = drm_info_printer(i915->drm.dev);
1068 pr_err("%s: Failed to start request %x, at %x\n",
1069 __func__, rq->fence.seqno, hws_seqno(&h, rq));
1070 intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
1072 i915_reset(i915, 0);
1073 i915_gem_set_wedged(i915);
1079 mutex_unlock(&i915->drm.struct_mutex);
1081 /* Temporarily disable error capture */
1082 error = xchg(&i915->gpu_error.first_error, (void *)-1);
1084 engine->hangcheck.stalled = true;
1085 engine->hangcheck.seqno = intel_engine_get_seqno(engine);
1087 i915_handle_error(i915, intel_engine_flag(engine), "%s", __func__);
1089 xchg(&i915->gpu_error.first_error, error);
1091 mutex_lock(&i915->drm.struct_mutex);
1093 if (rq->fence.error != -EIO) {
1094 pr_err("Guilty request not identified!\n");
1100 i915_request_put(rq);
1104 mutex_unlock(&i915->drm.struct_mutex);
1108 int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
1110 static const struct i915_subtest tests[] = {
1111 SUBTEST(igt_global_reset), /* attempt to recover GPU first */
1112 SUBTEST(igt_hang_sanitycheck),
1113 SUBTEST(igt_reset_idle_engine),
1114 SUBTEST(igt_reset_active_engine),
1115 SUBTEST(igt_reset_idle_engine_others),
1116 SUBTEST(igt_reset_active_engine_others),
1117 SUBTEST(igt_wait_reset),
1118 SUBTEST(igt_reset_queue),
1119 SUBTEST(igt_handle_error),
1121 bool saved_hangcheck;
1124 if (!intel_has_gpu_reset(i915))
1127 intel_runtime_pm_get(i915);
1128 saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck);
1130 err = i915_subtests(tests, i915);
1132 i915_modparams.enable_hangcheck = saved_hangcheck;
1133 intel_runtime_pm_put(i915);