]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
drm/i915: Avoid calling i915_gem_object_unbind holding object lock
authorChris Wilson <chris@chris-wilson.co.uk>
Fri, 6 Dec 2019 10:55:27 +0000 (10:55 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Sat, 7 Dec 2019 19:27:36 +0000 (19:27 +0000)
In the extreme case, we may wish to wait on an rcu-barrier to reap stale
vm to purge the last of the object bindings. However, we are not allowed
to use rcu_barrier() beneath the dma_resv (i.e. object) lock and do not
take lightly the prospect of unlocking a mutex deep in the bowels of the
routine. i915_gem_object_unbind() itself does not need the object lock,
and it turns out the callers do not need to the unbind as part of a
locked sequence around set-cache-level, so rearrange the code to avoid
taking the object lock in the callers.

<4> [186.816311] ======================================================
<4> [186.816313] WARNING: possible circular locking dependency detected
<4> [186.816316] 5.4.0-rc8-CI-CI_DRM_7486+ #1 Tainted: G     U
<4> [186.816318] ------------------------------------------------------
<4> [186.816320] perf_pmu/1321 is trying to acquire lock:
<4> [186.816322] ffff88849487c4d8 (&mm->mmap_sem#2){++++}, at: __might_fault+0x39/0x90
<4> [186.816331]
but task is already holding lock:
<4> [186.816333] ffffe8ffffa05008 (&cpuctx_mutex){+.+.}, at: perf_event_ctx_lock_nested+0xa9/0x1b0
<4> [186.816339]
which lock already depends on the new lock.

<4> [186.816341]
the existing dependency chain (in reverse order) is:
<4> [186.816343]
-> #6 (&cpuctx_mutex){+.+.}:
<4> [186.816349]        __mutex_lock+0x9a/0x9d0
<4> [186.816352]        perf_event_init_cpu+0xa4/0x140
<4> [186.816357]        perf_event_init+0x19d/0x1cd
<4> [186.816362]        start_kernel+0x372/0x4f4
<4> [186.816365]        secondary_startup_64+0xa4/0xb0
<4> [186.816381]
-> #5 (pmus_lock){+.+.}:
<4> [186.816385]        __mutex_lock+0x9a/0x9d0
<4> [186.816387]        perf_event_init_cpu+0x6b/0x140
<4> [186.816404]        cpuhp_invoke_callback+0x9b/0x9d0
<4> [186.816406]        _cpu_up+0xa2/0x140
<4> [186.816409]        do_cpu_up+0x61/0xa0
<4> [186.816411]        smp_init+0x57/0x96
<4> [186.816413]        kernel_init_freeable+0xac/0x1c7
<4> [186.816416]        kernel_init+0x5/0x100
<4> [186.816419]        ret_from_fork+0x24/0x50
<4> [186.816421]
-> #4 (cpu_hotplug_lock.rw_sem){++++}:
<4> [186.816424]        cpus_read_lock+0x34/0xd0
<4> [186.816427]        rcu_barrier+0xaa/0x190
<4> [186.816429]        kernel_init+0x21/0x100
<4> [186.816431]        ret_from_fork+0x24/0x50
<4> [186.816433]
-> #3 (rcu_state.barrier_mutex){+.+.}:
<4> [186.816436]        __mutex_lock+0x9a/0x9d0
<4> [186.816438]        rcu_barrier+0x23/0x190
<4> [186.816502]        i915_gem_object_unbind+0x3a6/0x400 [i915]
<4> [186.816537]        i915_gem_object_set_cache_level+0x32/0x90 [i915]
<4> [186.816571]        i915_gem_object_pin_to_display_plane+0x5d/0x160 [i915]
<4> [186.816612]        intel_pin_and_fence_fb_obj+0x9e/0x200 [i915]
<4> [186.816679]        intel_plane_pin_fb+0x3f/0xd0 [i915]
<4> [186.816717]        intel_prepare_plane_fb+0x130/0x520 [i915]
<4> [186.816722]        drm_atomic_helper_prepare_planes+0x85/0x110
<4> [186.816761]        intel_atomic_commit+0xc6/0x350 [i915]
<4> [186.816764]        drm_atomic_helper_update_plane+0xed/0x110
<4> [186.816768]        setplane_internal+0x97/0x190
<4> [186.816770]        drm_mode_setplane+0xcd/0x190
<4> [186.816773]        drm_ioctl_kernel+0xa7/0xf0
<4> [186.816775]        drm_ioctl+0x2e1/0x390
<4> [186.816778]        do_vfs_ioctl+0xa0/0x6f0
<4> [186.816780]        ksys_ioctl+0x35/0x60
<4> [186.816782]        __x64_sys_ioctl+0x11/0x20
<4> [186.816785]        do_syscall_64+0x4f/0x210
<4> [186.816787]        entry_SYSCALL_64_after_hwframe+0x49/0xbe
<4> [186.816789]
-> #2 (reservation_ww_class_mutex){+.+.}:
<4> [186.816793]        __ww_mutex_lock.constprop.15+0xc3/0x1090
<4> [186.816795]        ww_mutex_lock+0x39/0x70
<4> [186.816798]        dma_resv_lockdep+0x10e/0x1f7
<4> [186.816800]        do_one_initcall+0x58/0x2ff
<4> [186.816802]        kernel_init_freeable+0x137/0x1c7
<4> [186.816804]        kernel_init+0x5/0x100
<4> [186.816806]        ret_from_fork+0x24/0x50
<4> [186.816808]
-> #1 (reservation_ww_class_acquire){+.+.}:
<4> [186.816811]        dma_resv_lockdep+0xec/0x1f7
<4> [186.816813]        do_one_initcall+0x58/0x2ff
<4> [186.816815]        kernel_init_freeable+0x137/0x1c7
<4> [186.816817]        kernel_init+0x5/0x100
<4> [186.816819]        ret_from_fork+0x24/0x50
<4> [186.816820]
-> #0 (&mm->mmap_sem#2){++++}:
<4> [186.816824]        __lock_acquire+0x1328/0x15d0
<4> [186.816826]        lock_acquire+0xa7/0x1c0
<4> [186.816828]        __might_fault+0x63/0x90
<4> [186.816831]        _copy_to_user+0x1e/0x80
<4> [186.816834]        perf_read+0x200/0x2b0
<4> [186.816836]        vfs_read+0x96/0x160
<4> [186.816838]        ksys_read+0x9f/0xe0
<4> [186.816839]        do_syscall_64+0x4f/0x210
<4> [186.816841]        entry_SYSCALL_64_after_hwframe+0x49/0xbe
<4> [186.816843]
other info that might help us debug this:

<4> [186.816846] Chain exists of:
  &mm->mmap_sem#2 --> pmus_lock --> &cpuctx_mutex

<4> [186.816849]  Possible unsafe locking scenario:

<4> [186.816851]        CPU0                    CPU1
<4> [186.816853]        ----                    ----
<4> [186.816854]   lock(&cpuctx_mutex);
<4> [186.816856]                                lock(pmus_lock);
<4> [186.816858]                                lock(&cpuctx_mutex);
<4> [186.816860]   lock(&mm->mmap_sem#2);
<4> [186.816861]
 *** DEADLOCK ***

Closes: https://gitlab.freedesktop.org/drm/intel/issues/728
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Andi Shyti <andi.shyti@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191206105527.1130413-5-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/display/intel_display.c
drivers/gpu/drm/i915/display/intel_overlay.c
drivers/gpu/drm/i915/gem/i915_gem_domain.c

index a35ec8e027c7b8ee51396c55c6008cae2dc6db9e..37760a3814020ce490e698454e3d97a8b53627a5 100644 (file)
@@ -2165,19 +2165,18 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
         * pin/unpin/fence and not more.
         */
        wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
-       i915_gem_object_lock(obj);
 
        atomic_inc(&dev_priv->gpu_error.pending_fb_pin);
 
-       pinctl = 0;
-
-       /* Valleyview is definitely limited to scanning out the first
+       /*
+        * Valleyview is definitely limited to scanning out the first
         * 512MiB. Lets presume this behaviour was inherited from the
         * g4x display engine and that all earlier gen are similarly
         * limited. Testing suggests that it is a little more
         * complicated than this. For example, Cherryview appears quite
         * happy to scanout from anywhere within its global aperture.
         */
+       pinctl = 0;
        if (HAS_GMCH(dev_priv))
                pinctl |= PIN_MAPPABLE;
 
@@ -2189,7 +2188,8 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
        if (uses_fence && i915_vma_is_map_and_fenceable(vma)) {
                int ret;
 
-               /* Install a fence for tiled scan-out. Pre-i965 always needs a
+               /*
+                * Install a fence for tiled scan-out. Pre-i965 always needs a
                 * fence, whereas 965+ only requires a fence if using
                 * framebuffer compression.  For simplicity, we always, when
                 * possible, install a fence as the cost is not that onerous.
@@ -2219,8 +2219,6 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
        i915_vma_get(vma);
 err:
        atomic_dec(&dev_priv->gpu_error.pending_fb_pin);
-
-       i915_gem_object_unlock(obj);
        intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
        return vma;
 }
index 8cfb785e761cc15f345f805e850aa3f44e9be1f0..2a44b3be2600c7a1b94f38a7f53bcc3b5a1b0453 100644 (file)
@@ -758,10 +758,8 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 
        atomic_inc(&dev_priv->gpu_error.pending_fb_pin);
 
-       i915_gem_object_lock(new_bo);
        vma = i915_gem_object_pin_to_display_plane(new_bo,
                                                   0, NULL, PIN_MAPPABLE);
-       i915_gem_object_unlock(new_bo);
        if (IS_ERR(vma)) {
                ret = PTR_ERR(vma);
                goto out_pin_section;
index 808eb327a29bde23b678319b267f7833b02f6a89..53e28e417cc9a6b807c8beaeb2e106af0e3e919a 100644 (file)
@@ -187,21 +187,23 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 {
        int ret;
 
-       assert_object_held(obj);
-
        if (obj->cache_level == cache_level)
                return 0;
 
-       ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
+       ret = i915_gem_object_lock_interruptible(obj);
        if (ret)
                return ret;
 
-       /* The cache-level will be applied when each vma is rebound. */
+       /* Always invalidate stale cachelines */
+       if (obj->cache_level != cache_level) {
+               i915_gem_object_set_cache_coherency(obj, cache_level);
+               obj->cache_dirty = true;
+       }
 
-       i915_gem_object_set_cache_coherency(obj, cache_level);
-       obj->cache_dirty = true; /* Always invalidate stale cachelines */
+       i915_gem_object_unlock(obj);
 
-       return 0;
+       /* The cache-level will be applied when each vma is rebound. */
+       return i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
 }
 
 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
@@ -282,20 +284,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
                goto out;
        }
 
-       if (obj->cache_level == level)
-               goto out;
-
-       ret = i915_gem_object_wait(obj,
-                                  I915_WAIT_INTERRUPTIBLE,
-                                  MAX_SCHEDULE_TIMEOUT);
-       if (ret)
-               goto out;
-
-       ret = i915_gem_object_lock_interruptible(obj);
-       if (ret == 0) {
-               ret = i915_gem_object_set_cache_level(obj, level);
-               i915_gem_object_unlock(obj);
-       }
+       ret = i915_gem_object_set_cache_level(obj, level);
 
 out:
        i915_gem_object_put(obj);
@@ -318,8 +307,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
        struct i915_vma *vma;
        int ret;
 
-       assert_object_held(obj);
-
        /* Frame buffer must be in LMEM (no migration yet) */
        if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
                return ERR_PTR(-EINVAL);
@@ -362,13 +349,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 
        vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
 
-       __i915_gem_object_flush_for_display(obj);
-
-       /*
-        * It should now be out of any other write domains, and we can update
-        * the domain values for our changes.
-        */
-       obj->read_domains |= I915_GEM_DOMAIN_GTT;
+       i915_gem_object_flush_if_display(obj);
 
        return vma;
 }