drivers/gpu/drm/i915/gt/selftest_context.c

   1 /*
   2  * SPDX-License-Identifier: GPL-2.0
   3  *
   4  * Copyright © 2019 Intel Corporation
   5  */
   6
   7 #include "i915_selftest.h"
   8 #include "intel_engine_pm.h"
   9 #include "intel_gt.h"
  10
  11 #include "gem/selftests/mock_context.h"
  12 #include "selftests/igt_flush_test.h"
  13 #include "selftests/mock_drm.h"
  14
  15 static int request_sync(struct i915_request *rq)
  16 {
  17         long timeout;
  18         int err = 0;
  19
  20         i915_request_get(rq);
  21
  22         i915_request_add(rq);
  23         timeout = i915_request_wait(rq, 0, HZ / 10);
  24         if (timeout < 0) {
  25                 err = timeout;
  26         } else {
  27                 mutex_lock(&rq->timeline->mutex);
  28                 i915_request_retire_upto(rq);
  29                 mutex_unlock(&rq->timeline->mutex);
  30         }
  31
  32         i915_request_put(rq);
  33
  34         return err;
  35 }
  36
  37 static int context_sync(struct intel_context *ce)
  38 {
  39         struct intel_timeline *tl = ce->timeline;
  40         int err = 0;
  41
  42         mutex_lock(&tl->mutex);
  43         do {
  44                 struct i915_request *rq;
  45                 long timeout;
  46
  47                 rcu_read_lock();
  48                 rq = rcu_dereference(tl->last_request.request);
  49                 if (rq)
  50                         rq = i915_request_get_rcu(rq);
  51                 rcu_read_unlock();
  52                 if (!rq)
  53                         break;
  54
  55                 timeout = i915_request_wait(rq, 0, HZ / 10);
  56                 if (timeout < 0)
  57                         err = timeout;
  58                 else
  59                         i915_request_retire_upto(rq);
  60
  61                 i915_request_put(rq);
  62         } while (!err);
  63         mutex_unlock(&tl->mutex);
  64
  65         return err;
  66 }
  67
  68 static int __live_context_size(struct intel_engine_cs *engine,
  69                                struct i915_gem_context *fixme)
  70 {
  71         struct intel_context *ce;
  72         struct i915_request *rq;
  73         void *vaddr;
  74         int err;
  75
  76         ce = intel_context_create(fixme, engine);
  77         if (IS_ERR(ce))
  78                 return PTR_ERR(ce);
  79
  80         err = intel_context_pin(ce);
  81         if (err)
  82                 goto err;
  83
  84         vaddr = i915_gem_object_pin_map(ce->state->obj,
  85                                         i915_coherent_map_type(engine->i915));
  86         if (IS_ERR(vaddr)) {
  87                 err = PTR_ERR(vaddr);
  88                 intel_context_unpin(ce);
  89                 goto err;
  90         }
  91
  92         /*
  93          * Note that execlists also applies a redzone which it checks on
  94          * context unpin when debugging. We are using the same location
  95          * and same poison value so that our checks overlap. Despite the
  96          * redundancy, we want to keep this little selftest so that we
  97          * get coverage of any and all submission backends, and we can
  98          * always extend this test to ensure we trick the HW into a
  99          * compromising position wrt to the various sections that need
 100          * to be written into the context state.
 101          *
 102          * TLDR; this overlaps with the execlists redzone.
 103          */
 104         if (HAS_EXECLISTS(engine->i915))
 105                 vaddr += LRC_HEADER_PAGES * PAGE_SIZE;
 106
 107         vaddr += engine->context_size - I915_GTT_PAGE_SIZE;
 108         memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE);
 109
 110         rq = intel_context_create_request(ce);
 111         intel_context_unpin(ce);
 112         if (IS_ERR(rq)) {
 113                 err = PTR_ERR(rq);
 114                 goto err_unpin;
 115         }
 116
 117         err = request_sync(rq);
 118         if (err)
 119                 goto err_unpin;
 120
 121         /* Force the context switch */
 122         rq = i915_request_create(engine->kernel_context);
 123         if (IS_ERR(rq)) {
 124                 err = PTR_ERR(rq);
 125                 goto err_unpin;
 126         }
 127         err = request_sync(rq);
 128         if (err)
 129                 goto err_unpin;
 130
 131         if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) {
 132                 pr_err("%s context overwrote trailing red-zone!", engine->name);
 133                 err = -EINVAL;
 134         }
 135
 136 err_unpin:
 137         i915_gem_object_unpin_map(ce->state->obj);
 138 err:
 139         intel_context_put(ce);
 140         return err;
 141 }
 142
 143 static int live_context_size(void *arg)
 144 {
 145         struct intel_gt *gt = arg;
 146         struct intel_engine_cs *engine;
 147         struct i915_gem_context *fixme;
 148         enum intel_engine_id id;
 149         int err = 0;
 150
 151         /*
 152          * Check that our context sizes are correct by seeing if the
 153          * HW tries to write past the end of one.
 154          */
 155
 156         mutex_lock(&gt->i915->drm.struct_mutex);
 157
 158         fixme = kernel_context(gt->i915);
 159         if (IS_ERR(fixme)) {
 160                 err = PTR_ERR(fixme);
 161                 goto unlock;
 162         }
 163
 164         for_each_engine(engine, gt->i915, id) {
 165                 struct {
 166                         struct drm_i915_gem_object *state;
 167                         void *pinned;
 168                 } saved;
 169
 170                 if (!engine->context_size)
 171                         continue;
 172
 173                 intel_engine_pm_get(engine);
 174
 175                 /*
 176                  * Hide the old default state -- we lie about the context size
 177                  * and get confused when the default state is smaller than
 178                  * expected. For our do nothing request, inheriting the
 179                  * active state is sufficient, we are only checking that we
 180                  * don't use more than we planned.
 181                  */
 182                 saved.state = fetch_and_zero(&engine->default_state);
 183                 saved.pinned = fetch_and_zero(&engine->pinned_default_state);
 184
 185                 /* Overlaps with the execlists redzone */
 186                 engine->context_size += I915_GTT_PAGE_SIZE;
 187
 188                 err = __live_context_size(engine, fixme);
 189
 190                 engine->context_size -= I915_GTT_PAGE_SIZE;
 191
 192                 engine->pinned_default_state = saved.pinned;
 193                 engine->default_state = saved.state;
 194
 195                 intel_engine_pm_put(engine);
 196
 197                 if (err)
 198                         break;
 199         }
 200
 201         kernel_context_close(fixme);
 202 unlock:
 203         mutex_unlock(&gt->i915->drm.struct_mutex);
 204         return err;
 205 }
 206
 207 static int __live_active_context(struct intel_engine_cs *engine,
 208                                  struct i915_gem_context *fixme)
 209 {
 210         struct intel_context *ce;
 211         int pass;
 212         int err;
 213
 214         /*
 215          * We keep active contexts alive until after a subsequent context
 216          * switch as the final write from the context-save will be after
 217          * we retire the final request. We track when we unpin the context,
 218          * under the presumption that the final pin is from the last request,
 219          * and instead of immediately unpinning the context, we add a task
 220          * to unpin the context from the next idle-barrier.
 221          *
 222          * This test makes sure that the context is kept alive until a
 223          * subsequent idle-barrier (emitted when the engine wakeref hits 0
 224          * with no more outstanding requests).
 225          */
 226
 227         if (intel_engine_pm_is_awake(engine)) {
 228                 pr_err("%s is awake before starting %s!\n",
 229                        engine->name, __func__);
 230                 return -EINVAL;
 231         }
 232
 233         ce = intel_context_create(fixme, engine);
 234         if (IS_ERR(ce))
 235                 return PTR_ERR(ce);
 236
 237         for (pass = 0; pass <= 2; pass++) {
 238                 struct i915_request *rq;
 239
 240                 rq = intel_context_create_request(ce);
 241                 if (IS_ERR(rq)) {
 242                         err = PTR_ERR(rq);
 243                         goto err;
 244                 }
 245
 246                 err = request_sync(rq);
 247                 if (err)
 248                         goto err;
 249
 250                 /* Context will be kept active until after an idle-barrier. */
 251                 if (i915_active_is_idle(&ce->active)) {
 252                         pr_err("context is not active; expected idle-barrier (%s pass %d)\n",
 253                                engine->name, pass);
 254                         err = -EINVAL;
 255                         goto err;
 256                 }
 257
 258                 if (!intel_engine_pm_is_awake(engine)) {
 259                         pr_err("%s is asleep before idle-barrier\n",
 260                                engine->name);
 261                         err = -EINVAL;
 262                         goto err;
 263                 }
 264         }
 265
 266         /* Now make sure our idle-barriers are flushed */
 267         err = context_sync(engine->kernel_context);
 268         if (err)
 269                 goto err;
 270
 271         if (!i915_active_is_idle(&ce->active)) {
 272                 pr_err("context is still active!");
 273                 err = -EINVAL;
 274         }
 275
 276         if (intel_engine_pm_is_awake(engine)) {
 277                 struct drm_printer p = drm_debug_printer(__func__);
 278
 279                 intel_engine_dump(engine, &p,
 280                                   "%s is still awake after idle-barriers\n",
 281                                   engine->name);
 282                 GEM_TRACE_DUMP();
 283
 284                 err = -EINVAL;
 285                 goto err;
 286         }
 287
 288 err:
 289         intel_context_put(ce);
 290         return err;
 291 }
 292
 293 static int live_active_context(void *arg)
 294 {
 295         struct intel_gt *gt = arg;
 296         struct intel_engine_cs *engine;
 297         struct i915_gem_context *fixme;
 298         enum intel_engine_id id;
 299         struct drm_file *file;
 300         int err = 0;
 301
 302         file = mock_file(gt->i915);
 303         if (IS_ERR(file))
 304                 return PTR_ERR(file);
 305
 306         mutex_lock(&gt->i915->drm.struct_mutex);
 307
 308         fixme = live_context(gt->i915, file);
 309         if (IS_ERR(fixme)) {
 310                 err = PTR_ERR(fixme);
 311                 goto unlock;
 312         }
 313
 314         for_each_engine(engine, gt->i915, id) {
 315                 err = __live_active_context(engine, fixme);
 316                 if (err)
 317                         break;
 318
 319                 err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
 320                 if (err)
 321                         break;
 322         }
 323
 324 unlock:
 325         mutex_unlock(&gt->i915->drm.struct_mutex);
 326         mock_file_free(gt->i915, file);
 327         return err;
 328 }
 329
 330 static int __remote_sync(struct intel_context *ce, struct intel_context *remote)
 331 {
 332         struct i915_request *rq;
 333         int err;
 334
 335         err = intel_context_pin(remote);
 336         if (err)
 337                 return err;
 338
 339         rq = intel_context_create_request(ce);
 340         if (IS_ERR(rq)) {
 341                 err = PTR_ERR(rq);
 342                 goto unpin;
 343         }
 344
 345         err = intel_context_prepare_remote_request(remote, rq);
 346         if (err) {
 347                 i915_request_add(rq);
 348                 goto unpin;
 349         }
 350
 351         err = request_sync(rq);
 352
 353 unpin:
 354         intel_context_unpin(remote);
 355         return err;
 356 }
 357
 358 static int __live_remote_context(struct intel_engine_cs *engine,
 359                                  struct i915_gem_context *fixme)
 360 {
 361         struct intel_context *local, *remote;
 362         int pass;
 363         int err;
 364
 365         /*
 366          * Check that our idle barriers do not interfere with normal
 367          * activity tracking. In particular, check that operating
 368          * on the context image remotely (intel_context_prepare_remote_request),
 369          * which inserts foreign fences into intel_context.active, does not
 370          * clobber the idle-barrier.
 371          */
 372
 373         remote = intel_context_create(fixme, engine);
 374         if (IS_ERR(remote))
 375                 return PTR_ERR(remote);
 376
 377         local = intel_context_create(fixme, engine);
 378         if (IS_ERR(local)) {
 379                 err = PTR_ERR(local);
 380                 goto err_remote;
 381         }
 382
 383         for (pass = 0; pass <= 2; pass++) {
 384                 err = __remote_sync(local, remote);
 385                 if (err)
 386                         break;
 387
 388                 err = __remote_sync(engine->kernel_context, remote);
 389                 if (err)
 390                         break;
 391
 392                 if (i915_active_is_idle(&remote->active)) {
 393                         pr_err("remote context is not active; expected idle-barrier (%s pass %d)\n",
 394                                engine->name, pass);
 395                         err = -EINVAL;
 396                         break;
 397                 }
 398         }
 399
 400         intel_context_put(local);
 401 err_remote:
 402         intel_context_put(remote);
 403         return err;
 404 }
 405
 406 static int live_remote_context(void *arg)
 407 {
 408         struct intel_gt *gt = arg;
 409         struct intel_engine_cs *engine;
 410         struct i915_gem_context *fixme;
 411         enum intel_engine_id id;
 412         struct drm_file *file;
 413         int err = 0;
 414
 415         file = mock_file(gt->i915);
 416         if (IS_ERR(file))
 417                 return PTR_ERR(file);
 418
 419         mutex_lock(&gt->i915->drm.struct_mutex);
 420
 421         fixme = live_context(gt->i915, file);
 422         if (IS_ERR(fixme)) {
 423                 err = PTR_ERR(fixme);
 424                 goto unlock;
 425         }
 426
 427         for_each_engine(engine, gt->i915, id) {
 428                 err = __live_remote_context(engine, fixme);
 429                 if (err)
 430                         break;
 431
 432                 err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
 433                 if (err)
 434                         break;
 435         }
 436
 437 unlock:
 438         mutex_unlock(&gt->i915->drm.struct_mutex);
 439         mock_file_free(gt->i915, file);
 440         return err;
 441 }
 442
 443 int intel_context_live_selftests(struct drm_i915_private *i915)
 444 {
 445         static const struct i915_subtest tests[] = {
 446                 SUBTEST(live_context_size),
 447                 SUBTEST(live_active_context),
 448                 SUBTEST(live_remote_context),
 449         };
 450         struct intel_gt *gt = &i915->gt;
 451
 452         if (intel_gt_is_wedged(gt))
 453                 return 0;
 454
 455         return intel_gt_live_subtests(tests, gt);
 456 }