drivers/gpu/drm/i915/i915_active.c

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2019 Intel Corporation
   5  */
   6
   7 #include <linux/debugobjects.h>
   8
   9 #include "gt/intel_engine_pm.h"
  10 #include "gt/intel_ring.h"
  11
  12 #include "i915_drv.h"
  13 #include "i915_active.h"
  14 #include "i915_globals.h"
  15
  16 /*
  17  * Active refs memory management
  18  *
  19  * To be more economical with memory, we reap all the i915_active trees as
  20  * they idle (when we know the active requests are inactive) and allocate the
  21  * nodes from a local slab cache to hopefully reduce the fragmentation.
  22  */
  23 static struct i915_global_active {
  24         struct i915_global base;
  25         struct kmem_cache *slab_cache;
  26 } global;
  27
  28 struct active_node {
  29         struct i915_active_fence base;
  30         struct i915_active *ref;
  31         struct rb_node node;
  32         u64 timeline;
  33 };
  34
  35 static inline struct active_node *
  36 node_from_active(struct i915_active_fence *active)
  37 {
  38         return container_of(active, struct active_node, base);
  39 }
  40
  41 #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers)
  42
  43 static inline bool is_barrier(const struct i915_active_fence *active)
  44 {
  45         return IS_ERR(rcu_access_pointer(active->fence));
  46 }
  47
  48 static inline struct llist_node *barrier_to_ll(struct active_node *node)
  49 {
  50         GEM_BUG_ON(!is_barrier(&node->base));
  51         return (struct llist_node *)&node->base.cb.node;
  52 }
  53
  54 static inline struct intel_engine_cs *
  55 __barrier_to_engine(struct active_node *node)
  56 {
  57         return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev);
  58 }
  59
  60 static inline struct intel_engine_cs *
  61 barrier_to_engine(struct active_node *node)
  62 {
  63         GEM_BUG_ON(!is_barrier(&node->base));
  64         return __barrier_to_engine(node);
  65 }
  66
  67 static inline struct active_node *barrier_from_ll(struct llist_node *x)
  68 {
  69         return container_of((struct list_head *)x,
  70                             struct active_node, base.cb.node);
  71 }
  72
  73 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS)
  74
  75 static void *active_debug_hint(void *addr)
  76 {
  77         struct i915_active *ref = addr;
  78
  79         return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref;
  80 }
  81
  82 static struct debug_obj_descr active_debug_desc = {
  83         .name = "i915_active",
  84         .debug_hint = active_debug_hint,
  85 };
  86
  87 static void debug_active_init(struct i915_active *ref)
  88 {
  89         debug_object_init(ref, &active_debug_desc);
  90 }
  91
  92 static void debug_active_activate(struct i915_active *ref)
  93 {
  94         spin_lock_irq(&ref->tree_lock);
  95         if (!atomic_read(&ref->count)) /* before the first inc */
  96                 debug_object_activate(ref, &active_debug_desc);
  97         spin_unlock_irq(&ref->tree_lock);
  98 }
  99
 100 static void debug_active_deactivate(struct i915_active *ref)
 101 {
 102         lockdep_assert_held(&ref->tree_lock);
 103         if (!atomic_read(&ref->count)) /* after the last dec */
 104                 debug_object_deactivate(ref, &active_debug_desc);
 105 }
 106
 107 static void debug_active_fini(struct i915_active *ref)
 108 {
 109         debug_object_free(ref, &active_debug_desc);
 110 }
 111
 112 static void debug_active_assert(struct i915_active *ref)
 113 {
 114         debug_object_assert_init(ref, &active_debug_desc);
 115 }
 116
 117 #else
 118
 119 static inline void debug_active_init(struct i915_active *ref) { }
 120 static inline void debug_active_activate(struct i915_active *ref) { }
 121 static inline void debug_active_deactivate(struct i915_active *ref) { }
 122 static inline void debug_active_fini(struct i915_active *ref) { }
 123 static inline void debug_active_assert(struct i915_active *ref) { }
 124
 125 #endif
 126
 127 static void
 128 __active_retire(struct i915_active *ref)
 129 {
 130         struct active_node *it, *n;
 131         struct rb_root root;
 132         unsigned long flags;
 133
 134         GEM_BUG_ON(i915_active_is_idle(ref));
 135
 136         /* return the unused nodes to our slabcache -- flushing the allocator */
 137         if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags))
 138                 return;
 139
 140         GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
 141         debug_active_deactivate(ref);
 142
 143         root = ref->tree;
 144         ref->tree = RB_ROOT;
 145         ref->cache = NULL;
 146
 147         spin_unlock_irqrestore(&ref->tree_lock, flags);
 148
 149         /* After the final retire, the entire struct may be freed */
 150         if (ref->retire)
 151                 ref->retire(ref);
 152
 153         /* ... except if you wait on it, you must manage your own references! */
 154         wake_up_var(ref);
 155
 156         rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
 157                 GEM_BUG_ON(i915_active_fence_isset(&it->base));
 158                 kmem_cache_free(global.slab_cache, it);
 159         }
 160 }
 161
 162 static void
 163 active_work(struct work_struct *wrk)
 164 {
 165         struct i915_active *ref = container_of(wrk, typeof(*ref), work);
 166
 167         GEM_BUG_ON(!atomic_read(&ref->count));
 168         if (atomic_add_unless(&ref->count, -1, 1))
 169                 return;
 170
 171         __active_retire(ref);
 172 }
 173
 174 static void
 175 active_retire(struct i915_active *ref)
 176 {
 177         GEM_BUG_ON(!atomic_read(&ref->count));
 178         if (atomic_add_unless(&ref->count, -1, 1))
 179                 return;
 180
 181         if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) {
 182                 queue_work(system_unbound_wq, &ref->work);
 183                 return;
 184         }
 185
 186         __active_retire(ref);
 187 }
 188
 189 static void
 190 node_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
 191 {
 192         i915_active_fence_cb(fence, cb);
 193         active_retire(container_of(cb, struct active_node, base.cb)->ref);
 194 }
 195
 196 static void
 197 excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
 198 {
 199         i915_active_fence_cb(fence, cb);
 200         active_retire(container_of(cb, struct i915_active, excl.cb));
 201 }
 202
 203 static struct i915_active_fence *
 204 active_instance(struct i915_active *ref, struct intel_timeline *tl)
 205 {
 206         struct active_node *node, *prealloc;
 207         struct rb_node **p, *parent;
 208         u64 idx = tl->fence_context;
 209
 210         /*
 211          * We track the most recently used timeline to skip a rbtree search
 212          * for the common case, under typical loads we never need the rbtree
 213          * at all. We can reuse the last slot if it is empty, that is
 214          * after the previous activity has been retired, or if it matches the
 215          * current timeline.
 216          */
 217         node = READ_ONCE(ref->cache);
 218         if (node && node->timeline == idx)
 219                 return &node->base;
 220
 221         /* Preallocate a replacement, just in case */
 222         prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
 223         if (!prealloc)
 224                 return NULL;
 225
 226         spin_lock_irq(&ref->tree_lock);
 227         GEM_BUG_ON(i915_active_is_idle(ref));
 228
 229         parent = NULL;
 230         p = &ref->tree.rb_node;
 231         while (*p) {
 232                 parent = *p;
 233
 234                 node = rb_entry(parent, struct active_node, node);
 235                 if (node->timeline == idx) {
 236                         kmem_cache_free(global.slab_cache, prealloc);
 237                         goto out;
 238                 }
 239
 240                 if (node->timeline < idx)
 241                         p = &parent->rb_right;
 242                 else
 243                         p = &parent->rb_left;
 244         }
 245
 246         node = prealloc;
 247         __i915_active_fence_init(&node->base, &tl->mutex, NULL, node_retire);
 248         node->ref = ref;
 249         node->timeline = idx;
 250
 251         rb_link_node(&node->node, parent, p);
 252         rb_insert_color(&node->node, &ref->tree);
 253
 254 out:
 255         ref->cache = node;
 256         spin_unlock_irq(&ref->tree_lock);
 257
 258         BUILD_BUG_ON(offsetof(typeof(*node), base));
 259         return &node->base;
 260 }
 261
 262 void __i915_active_init(struct i915_active *ref,
 263                         int (*active)(struct i915_active *ref),
 264                         void (*retire)(struct i915_active *ref),
 265                         struct lock_class_key *key)
 266 {
 267         unsigned long bits;
 268
 269         debug_active_init(ref);
 270
 271         ref->flags = 0;
 272         ref->active = active;
 273         ref->retire = ptr_unpack_bits(retire, &bits, 2);
 274         if (bits & I915_ACTIVE_MAY_SLEEP)
 275                 ref->flags |= I915_ACTIVE_RETIRE_SLEEPS;
 276
 277         spin_lock_init(&ref->tree_lock);
 278         ref->tree = RB_ROOT;
 279         ref->cache = NULL;
 280
 281         init_llist_head(&ref->preallocated_barriers);
 282         atomic_set(&ref->count, 0);
 283         __mutex_init(&ref->mutex, "i915_active", key);
 284         __i915_active_fence_init(&ref->excl, &ref->mutex, NULL, excl_retire);
 285         INIT_WORK(&ref->work, active_work);
 286 }
 287
 288 static bool ____active_del_barrier(struct i915_active *ref,
 289                                    struct active_node *node,
 290                                    struct intel_engine_cs *engine)
 291
 292 {
 293         struct llist_node *head = NULL, *tail = NULL;
 294         struct llist_node *pos, *next;
 295
 296         GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context);
 297
 298         /*
 299          * Rebuild the llist excluding our node. We may perform this
 300          * outside of the kernel_context timeline mutex and so someone
 301          * else may be manipulating the engine->barrier_tasks, in
 302          * which case either we or they will be upset :)
 303          *
 304          * A second __active_del_barrier() will report failure to claim
 305          * the active_node and the caller will just shrug and know not to
 306          * claim ownership of its node.
 307          *
 308          * A concurrent i915_request_add_active_barriers() will miss adding
 309          * any of the tasks, but we will try again on the next -- and since
 310          * we are actively using the barrier, we know that there will be
 311          * at least another opportunity when we idle.
 312          */
 313         llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) {
 314                 if (node == barrier_from_ll(pos)) {
 315                         node = NULL;
 316                         continue;
 317                 }
 318
 319                 pos->next = head;
 320                 head = pos;
 321                 if (!tail)
 322                         tail = pos;
 323         }
 324         if (head)
 325                 llist_add_batch(head, tail, &engine->barrier_tasks);
 326
 327         return !node;
 328 }
 329
 330 static bool
 331 __active_del_barrier(struct i915_active *ref, struct active_node *node)
 332 {
 333         return ____active_del_barrier(ref, node, barrier_to_engine(node));
 334 }
 335
 336 int i915_active_ref(struct i915_active *ref,
 337                     struct intel_timeline *tl,
 338                     struct dma_fence *fence)
 339 {
 340         struct i915_active_fence *active;
 341         int err;
 342
 343         lockdep_assert_held(&tl->mutex);
 344
 345         /* Prevent reaping in case we malloc/wait while building the tree */
 346         err = i915_active_acquire(ref);
 347         if (err)
 348                 return err;
 349
 350         active = active_instance(ref, tl);
 351         if (!active) {
 352                 err = -ENOMEM;
 353                 goto out;
 354         }
 355
 356         if (is_barrier(active)) { /* proto-node used by our idle barrier */
 357                 /*
 358                  * This request is on the kernel_context timeline, and so
 359                  * we can use it to substitute for the pending idle-barrer
 360                  * request that we want to emit on the kernel_context.
 361                  */
 362                 __active_del_barrier(ref, node_from_active(active));
 363                 RCU_INIT_POINTER(active->fence, NULL);
 364                 atomic_dec(&ref->count);
 365         }
 366         if (!__i915_active_fence_set(active, fence))
 367                 atomic_inc(&ref->count);
 368
 369 out:
 370         i915_active_release(ref);
 371         return err;
 372 }
 373
 374 void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
 375 {
 376         /* We expect the caller to manage the exclusive timeline ordering */
 377         GEM_BUG_ON(i915_active_is_idle(ref));
 378
 379         /*
 380          * As we don't know which mutex the caller is using, we told a small
 381          * lie to the debug code that it is using the i915_active.mutex;
 382          * and now we must stick to that lie.
 383          */
 384         mutex_acquire(&ref->mutex.dep_map, 0, 0, _THIS_IP_);
 385         if (!__i915_active_fence_set(&ref->excl, f))
 386                 atomic_inc(&ref->count);
 387         mutex_release(&ref->mutex.dep_map, 0, _THIS_IP_);
 388 }
 389
 390 bool i915_active_acquire_if_busy(struct i915_active *ref)
 391 {
 392         debug_active_assert(ref);
 393         return atomic_add_unless(&ref->count, 1, 0);
 394 }
 395
 396 int i915_active_acquire(struct i915_active *ref)
 397 {
 398         int err;
 399
 400         if (i915_active_acquire_if_busy(ref))
 401                 return 0;
 402
 403         err = mutex_lock_interruptible(&ref->mutex);
 404         if (err)
 405                 return err;
 406
 407         if (!atomic_read(&ref->count) && ref->active)
 408                 err = ref->active(ref);
 409         if (!err) {
 410                 debug_active_activate(ref);
 411                 atomic_inc(&ref->count);
 412         }
 413
 414         mutex_unlock(&ref->mutex);
 415
 416         return err;
 417 }
 418
 419 void i915_active_release(struct i915_active *ref)
 420 {
 421         debug_active_assert(ref);
 422         active_retire(ref);
 423 }
 424
 425 static void enable_signaling(struct i915_active_fence *active)
 426 {
 427         struct dma_fence *fence;
 428
 429         fence = i915_active_fence_get(active);
 430         if (!fence)
 431                 return;
 432
 433         dma_fence_enable_sw_signaling(fence);
 434         dma_fence_put(fence);
 435 }
 436
 437 int i915_active_wait(struct i915_active *ref)
 438 {
 439         struct active_node *it, *n;
 440         int err = 0;
 441
 442         might_sleep();
 443
 444         if (!i915_active_acquire_if_busy(ref))
 445                 return 0;
 446
 447         /* Flush lazy signals */
 448         enable_signaling(&ref->excl);
 449         rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
 450                 if (is_barrier(&it->base)) /* unconnected idle barrier */
 451                         continue;
 452
 453                 enable_signaling(&it->base);
 454         }
 455         /* Any fence added after the wait begins will not be auto-signaled */
 456
 457         i915_active_release(ref);
 458         if (err)
 459                 return err;
 460
 461         if (wait_var_event_interruptible(ref, i915_active_is_idle(ref)))
 462                 return -EINTR;
 463
 464         return 0;
 465 }
 466
 467 int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
 468 {
 469         int err = 0;
 470
 471         if (rcu_access_pointer(ref->excl.fence)) {
 472                 struct dma_fence *fence;
 473
 474                 rcu_read_lock();
 475                 fence = dma_fence_get_rcu_safe(&ref->excl.fence);
 476                 rcu_read_unlock();
 477                 if (fence) {
 478                         err = i915_request_await_dma_fence(rq, fence);
 479                         dma_fence_put(fence);
 480                 }
 481         }
 482
 483         /* In the future we may choose to await on all fences */
 484
 485         return err;
 486 }
 487
 488 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 489 void i915_active_fini(struct i915_active *ref)
 490 {
 491         debug_active_fini(ref);
 492         GEM_BUG_ON(atomic_read(&ref->count));
 493         GEM_BUG_ON(work_pending(&ref->work));
 494         GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
 495         mutex_destroy(&ref->mutex);
 496 }
 497 #endif
 498
 499 static inline bool is_idle_barrier(struct active_node *node, u64 idx)
 500 {
 501         return node->timeline == idx && !i915_active_fence_isset(&node->base);
 502 }
 503
 504 static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
 505 {
 506         struct rb_node *prev, *p;
 507
 508         if (RB_EMPTY_ROOT(&ref->tree))
 509                 return NULL;
 510
 511         spin_lock_irq(&ref->tree_lock);
 512         GEM_BUG_ON(i915_active_is_idle(ref));
 513
 514         /*
 515          * Try to reuse any existing barrier nodes already allocated for this
 516          * i915_active, due to overlapping active phases there is likely a
 517          * node kept alive (as we reuse before parking). We prefer to reuse
 518          * completely idle barriers (less hassle in manipulating the llists),
 519          * but otherwise any will do.
 520          */
 521         if (ref->cache && is_idle_barrier(ref->cache, idx)) {
 522                 p = &ref->cache->node;
 523                 goto match;
 524         }
 525
 526         prev = NULL;
 527         p = ref->tree.rb_node;
 528         while (p) {
 529                 struct active_node *node =
 530                         rb_entry(p, struct active_node, node);
 531
 532                 if (is_idle_barrier(node, idx))
 533                         goto match;
 534
 535                 prev = p;
 536                 if (node->timeline < idx)
 537                         p = p->rb_right;
 538                 else
 539                         p = p->rb_left;
 540         }
 541
 542         /*
 543          * No quick match, but we did find the leftmost rb_node for the
 544          * kernel_context. Walk the rb_tree in-order to see if there were
 545          * any idle-barriers on this timeline that we missed, or just use
 546          * the first pending barrier.
 547          */
 548         for (p = prev; p; p = rb_next(p)) {
 549                 struct active_node *node =
 550                         rb_entry(p, struct active_node, node);
 551                 struct intel_engine_cs *engine;
 552
 553                 if (node->timeline > idx)
 554                         break;
 555
 556                 if (node->timeline < idx)
 557                         continue;
 558
 559                 if (is_idle_barrier(node, idx))
 560                         goto match;
 561
 562                 /*
 563                  * The list of pending barriers is protected by the
 564                  * kernel_context timeline, which notably we do not hold
 565                  * here. i915_request_add_active_barriers() may consume
 566                  * the barrier before we claim it, so we have to check
 567                  * for success.
 568                  */
 569                 engine = __barrier_to_engine(node);
 570                 smp_rmb(); /* serialise with add_active_barriers */
 571                 if (is_barrier(&node->base) &&
 572                     ____active_del_barrier(ref, node, engine))
 573                         goto match;
 574         }
 575
 576         spin_unlock_irq(&ref->tree_lock);
 577
 578         return NULL;
 579
 580 match:
 581         rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */
 582         if (p == &ref->cache->node)
 583                 ref->cache = NULL;
 584         spin_unlock_irq(&ref->tree_lock);
 585
 586         return rb_entry(p, struct active_node, node);
 587 }
 588
 589 int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 590                                             struct intel_engine_cs *engine)
 591 {
 592         intel_engine_mask_t tmp, mask = engine->mask;
 593         struct intel_gt *gt = engine->gt;
 594         struct llist_node *pos, *next;
 595         int err;
 596
 597         GEM_BUG_ON(i915_active_is_idle(ref));
 598         GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers));
 599
 600         /*
 601          * Preallocate a node for each physical engine supporting the target
 602          * engine (remember virtual engines have more than one sibling).
 603          * We can then use the preallocated nodes in
 604          * i915_active_acquire_barrier()
 605          */
 606         for_each_engine_masked(engine, gt, mask, tmp) {
 607                 u64 idx = engine->kernel_context->timeline->fence_context;
 608                 struct active_node *node;
 609
 610                 node = reuse_idle_barrier(ref, idx);
 611                 if (!node) {
 612                         node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
 613                         if (!node) {
 614                                 err = ENOMEM;
 615                                 goto unwind;
 616                         }
 617
 618 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 619                         node->base.lock =
 620                                 &engine->kernel_context->timeline->mutex;
 621 #endif
 622                         RCU_INIT_POINTER(node->base.fence, NULL);
 623                         node->base.cb.func = node_retire;
 624                         node->timeline = idx;
 625                         node->ref = ref;
 626                 }
 627
 628                 if (!i915_active_fence_isset(&node->base)) {
 629                         /*
 630                          * Mark this as being *our* unconnected proto-node.
 631                          *
 632                          * Since this node is not in any list, and we have
 633                          * decoupled it from the rbtree, we can reuse the
 634                          * request to indicate this is an idle-barrier node
 635                          * and then we can use the rb_node and list pointers
 636                          * for our tracking of the pending barrier.
 637                          */
 638                         RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN));
 639                         node->base.cb.node.prev = (void *)engine;
 640                         atomic_inc(&ref->count);
 641                 }
 642
 643                 GEM_BUG_ON(barrier_to_engine(node) != engine);
 644                 llist_add(barrier_to_ll(node), &ref->preallocated_barriers);
 645                 intel_engine_pm_get(engine);
 646         }
 647
 648         return 0;
 649
 650 unwind:
 651         llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
 652                 struct active_node *node = barrier_from_ll(pos);
 653
 654                 atomic_dec(&ref->count);
 655                 intel_engine_pm_put(barrier_to_engine(node));
 656
 657                 kmem_cache_free(global.slab_cache, node);
 658         }
 659         return err;
 660 }
 661
 662 void i915_active_acquire_barrier(struct i915_active *ref)
 663 {
 664         struct llist_node *pos, *next;
 665         unsigned long flags;
 666
 667         GEM_BUG_ON(i915_active_is_idle(ref));
 668
 669         /*
 670          * Transfer the list of preallocated barriers into the
 671          * i915_active rbtree, but only as proto-nodes. They will be
 672          * populated by i915_request_add_active_barriers() to point to the
 673          * request that will eventually release them.
 674          */
 675         spin_lock_irqsave_nested(&ref->tree_lock, flags, SINGLE_DEPTH_NESTING);
 676         llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
 677                 struct active_node *node = barrier_from_ll(pos);
 678                 struct intel_engine_cs *engine = barrier_to_engine(node);
 679                 struct rb_node **p, *parent;
 680
 681                 parent = NULL;
 682                 p = &ref->tree.rb_node;
 683                 while (*p) {
 684                         struct active_node *it;
 685
 686                         parent = *p;
 687
 688                         it = rb_entry(parent, struct active_node, node);
 689                         if (it->timeline < node->timeline)
 690                                 p = &parent->rb_right;
 691                         else
 692                                 p = &parent->rb_left;
 693                 }
 694                 rb_link_node(&node->node, parent, p);
 695                 rb_insert_color(&node->node, &ref->tree);
 696
 697                 GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
 698                 llist_add(barrier_to_ll(node), &engine->barrier_tasks);
 699                 intel_engine_pm_put(engine);
 700         }
 701         spin_unlock_irqrestore(&ref->tree_lock, flags);
 702 }
 703
 704 void i915_request_add_active_barriers(struct i915_request *rq)
 705 {
 706         struct intel_engine_cs *engine = rq->engine;
 707         struct llist_node *node, *next;
 708         unsigned long flags;
 709
 710         GEM_BUG_ON(intel_engine_is_virtual(engine));
 711         GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline);
 712
 713         node = llist_del_all(&engine->barrier_tasks);
 714         if (!node)
 715                 return;
 716         /*
 717          * Attach the list of proto-fences to the in-flight request such
 718          * that the parent i915_active will be released when this request
 719          * is retired.
 720          */
 721         spin_lock_irqsave(&rq->lock, flags);
 722         llist_for_each_safe(node, next, node) {
 723                 RCU_INIT_POINTER(barrier_from_ll(node)->base.fence, &rq->fence);
 724                 smp_wmb(); /* serialise with reuse_idle_barrier */
 725                 list_add_tail((struct list_head *)node, &rq->fence.cb_list);
 726         }
 727         spin_unlock_irqrestore(&rq->lock, flags);
 728 }
 729
 730 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 731 #define active_is_held(active) lockdep_is_held((active)->lock)
 732 #else
 733 #define active_is_held(active) true
 734 #endif
 735
 736 /*
 737  * __i915_active_fence_set: Update the last active fence along its timeline
 738  * @active: the active tracker
 739  * @fence: the new fence (under construction)
 740  *
 741  * Records the new @fence as the last active fence along its timeline in
 742  * this active tracker, moving the tracking callbacks from the previous
 743  * fence onto this one. Returns the previous fence (if not already completed),
 744  * which the caller must ensure is executed before the new fence. To ensure
 745  * that the order of fences within the timeline of the i915_active_fence is
 746  * maintained, it must be locked by the caller.
 747  */
 748 struct dma_fence *
 749 __i915_active_fence_set(struct i915_active_fence *active,
 750                         struct dma_fence *fence)
 751 {
 752         struct dma_fence *prev;
 753         unsigned long flags;
 754
 755         /* NB: must be serialised by an outer timeline mutex (active->lock) */
 756         spin_lock_irqsave(fence->lock, flags);
 757         GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
 758
 759         prev = rcu_dereference_protected(active->fence, active_is_held(active));
 760         if (prev) {
 761                 GEM_BUG_ON(prev == fence);
 762                 spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
 763                 __list_del_entry(&active->cb.node);
 764                 spin_unlock(prev->lock); /* serialise with prev->cb_list */
 765
 766                 /*
 767                  * active->fence is reset by the callback from inside
 768                  * interrupt context. We need to serialise our list
 769                  * manipulation with the fence->lock to prevent the prev
 770                  * being lost inside an interrupt (it can't be replaced as
 771                  * no other caller is allowed to enter __i915_active_fence_set
 772                  * as we hold the timeline lock). After serialising with
 773                  * the callback, we need to double check which ran first,
 774                  * our list_del() [decoupling prev from the callback] or
 775                  * the callback...
 776                  */
 777                 prev = rcu_access_pointer(active->fence);
 778         }
 779
 780         rcu_assign_pointer(active->fence, fence);
 781         list_add_tail(&active->cb.node, &fence->cb_list);
 782
 783         spin_unlock_irqrestore(fence->lock, flags);
 784
 785         return prev;
 786 }
 787
 788 int i915_active_fence_set(struct i915_active_fence *active,
 789                           struct i915_request *rq)
 790 {
 791         struct dma_fence *fence;
 792         int err = 0;
 793
 794 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 795         lockdep_assert_held(active->lock);
 796 #endif
 797
 798         /* Must maintain timeline ordering wrt previous active requests */
 799         rcu_read_lock();
 800         fence = __i915_active_fence_set(active, &rq->fence);
 801         if (fence) /* but the previous fence may not belong to that timeline! */
 802                 fence = dma_fence_get_rcu(fence);
 803         rcu_read_unlock();
 804         if (fence) {
 805                 err = i915_request_await_dma_fence(rq, fence);
 806                 dma_fence_put(fence);
 807         }
 808
 809         return err;
 810 }
 811
 812 void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb)
 813 {
 814         i915_active_fence_cb(fence, cb);
 815 }
 816
 817 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 818 #include "selftests/i915_active.c"
 819 #endif
 820
 821 static void i915_global_active_shrink(void)
 822 {
 823         kmem_cache_shrink(global.slab_cache);
 824 }
 825
 826 static void i915_global_active_exit(void)
 827 {
 828         kmem_cache_destroy(global.slab_cache);
 829 }
 830
 831 static struct i915_global_active global = { {
 832         .shrink = i915_global_active_shrink,
 833         .exit = i915_global_active_exit,
 834 } };
 835
 836 int __init i915_global_active_init(void)
 837 {
 838         global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
 839         if (!global.slab_cache)
 840                 return -ENOMEM;
 841
 842         i915_global_register(&global.base);
 843         return 0;
 844 }