]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - drivers/gpu/drm/i915/i915_gem.c
Merge branches 'pm-core', 'pm-qos', 'pm-domains' and 'pm-opp'
[linux.git] / drivers / gpu / drm / i915 / i915_gem.c
index 4a31b7a891ecaf3e2732c9f2d6ce62fa633419f6..24b5b046754b37e8b8e3ab2c04a9f890b41eecf9 100644 (file)
@@ -244,14 +244,16 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 
 static void
 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
-                               struct sg_table *pages)
+                               struct sg_table *pages,
+                               bool needs_clflush)
 {
        GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
 
        if (obj->mm.madv == I915_MADV_DONTNEED)
                obj->mm.dirty = false;
 
-       if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
+       if (needs_clflush &&
+           (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
            !cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
                drm_clflush_sg(pages);
 
@@ -263,7 +265,7 @@ static void
 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
                               struct sg_table *pages)
 {
-       __i915_gem_object_release_shmem(obj, pages);
+       __i915_gem_object_release_shmem(obj, pages, false);
 
        if (obj->mm.dirty) {
                struct address_space *mapping = obj->base.filp->f_mapping;
@@ -593,47 +595,21 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
                     struct drm_i915_gem_pwrite *args,
                     struct drm_file *file)
 {
-       struct drm_device *dev = obj->base.dev;
        void *vaddr = obj->phys_handle->vaddr + args->offset;
        char __user *user_data = u64_to_user_ptr(args->data_ptr);
-       int ret;
 
        /* We manually control the domain here and pretend that it
         * remains coherent i.e. in the GTT domain, like shmem_pwrite.
         */
-       lockdep_assert_held(&obj->base.dev->struct_mutex);
-       ret = i915_gem_object_wait(obj,
-                                  I915_WAIT_INTERRUPTIBLE |
-                                  I915_WAIT_LOCKED |
-                                  I915_WAIT_ALL,
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  to_rps_client(file));
-       if (ret)
-               return ret;
-
        intel_fb_obj_invalidate(obj, ORIGIN_CPU);
-       if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
-               unsigned long unwritten;
-
-               /* The physical object once assigned is fixed for the lifetime
-                * of the obj, so we can safely drop the lock and continue
-                * to access vaddr.
-                */
-               mutex_unlock(&dev->struct_mutex);
-               unwritten = copy_from_user(vaddr, user_data, args->size);
-               mutex_lock(&dev->struct_mutex);
-               if (unwritten) {
-                       ret = -EFAULT;
-                       goto out;
-               }
-       }
+       if (copy_from_user(vaddr, user_data, args->size))
+               return -EFAULT;
 
        drm_clflush_virt_range(vaddr, args->size);
-       i915_gem_chipset_flush(to_i915(dev));
+       i915_gem_chipset_flush(to_i915(obj->base.dev));
 
-out:
        intel_fb_obj_flush(obj, false, ORIGIN_CPU);
-       return ret;
+       return 0;
 }
 
 void *i915_gem_object_alloc(struct drm_device *dev)
@@ -2034,8 +2010,16 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
        for (i = 0; i < dev_priv->num_fence_regs; i++) {
                struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
 
-               if (WARN_ON(reg->pin_count))
-                       continue;
+               /* Ideally we want to assert that the fence register is not
+                * live at this point (i.e. that no piece of code will be
+                * trying to write through fence + GTT, as that both violates
+                * our tracking of activity and associated locking/barriers,
+                * but also is illegal given that the hw is powered down).
+                *
+                * Previously we used reg->pin_count as a "liveness" indicator.
+                * That is not sufficient, and we need a more fine-grained
+                * tool if we want to have a sanity check here.
+                */
 
                if (!reg->vma)
                        continue;
@@ -2231,7 +2215,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
        struct sgt_iter sgt_iter;
        struct page *page;
 
-       __i915_gem_object_release_shmem(obj, pages);
+       __i915_gem_object_release_shmem(obj, pages, true);
 
        i915_gem_gtt_finish_pages(obj, pages);
 
@@ -2304,15 +2288,6 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
        mutex_unlock(&obj->mm.lock);
 }
 
-static unsigned int swiotlb_max_size(void)
-{
-#if IS_ENABLED(CONFIG_SWIOTLB)
-       return rounddown(swiotlb_nr_tbl() << IO_TLB_SHIFT, PAGE_SIZE);
-#else
-       return 0;
-#endif
-}
-
 static void i915_sg_trim(struct sg_table *orig_st)
 {
        struct sg_table new_st;
@@ -2322,7 +2297,7 @@ static void i915_sg_trim(struct sg_table *orig_st)
        if (orig_st->nents == orig_st->orig_nents)
                return;
 
-       if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL))
+       if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
                return;
 
        new_sg = new_st.sgl;
@@ -2360,7 +2335,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
        GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
        GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
 
-       max_segment = swiotlb_max_size();
+       max_segment = swiotlb_max_segment();
        if (!max_segment)
                max_segment = rounddown(UINT_MAX, PAGE_SIZE);
 
@@ -2728,6 +2703,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
        struct drm_i915_gem_request *request;
        struct i915_gem_context *incomplete_ctx;
        struct intel_timeline *timeline;
+       unsigned long flags;
        bool ring_hung;
 
        if (engine->irq_seqno_barrier)
@@ -2763,13 +2739,20 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
        if (i915_gem_context_is_default(incomplete_ctx))
                return;
 
+       timeline = i915_gem_context_lookup_timeline(incomplete_ctx, engine);
+
+       spin_lock_irqsave(&engine->timeline->lock, flags);
+       spin_lock(&timeline->lock);
+
        list_for_each_entry_continue(request, &engine->timeline->requests, link)
                if (request->ctx == incomplete_ctx)
                        reset_request(request);
 
-       timeline = i915_gem_context_lookup_timeline(incomplete_ctx, engine);
        list_for_each_entry(request, &timeline->requests, link)
                reset_request(request);
+
+       spin_unlock(&timeline->lock);
+       spin_unlock_irqrestore(&engine->timeline->lock, flags);
 }
 
 void i915_gem_reset(struct drm_i915_private *dev_priv)
@@ -3503,7 +3486,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
        vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
 
        /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
-       if (obj->cache_dirty) {
+       if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
                i915_gem_clflush_object(obj, true);
                intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
        }