Merge branches 'pm-core', 'pm-qos', 'pm-domains' and 'pm-opp'

[linux.git] / drivers / gpu / drm / i915 / i915_gem_execbuffer.c
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c

index a218c2e395e759e9e3b9c367324311550f4db0a8..b8b877c91b0a9b36b1a9ac318b2e5255b096a46e 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -34,7 +34,6 @@
  #include <drm/i915_drm.h>
  
  #include "i915_drv.h"
-#include "i915_gem_dmabuf.h"
  #include "i915_trace.h"
  #include "intel_drv.h"
  #include "intel_frontbuffer.h"
@@ -288,7 +287,7 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
         if (DBG_USE_CPU_RELOC)
                 return DBG_USE_CPU_RELOC > 0;
  
-       return (HAS_LLC(obj->base.dev) ||
+       return (HAS_LLC(to_i915(obj->base.dev)) ||
                 obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
                 obj->cache_level != I915_CACHE_NONE);
  }
@@ -332,7 +331,8 @@ static void reloc_cache_init(struct reloc_cache *cache,
         cache->page = -1;
         cache->vaddr = 0;
         cache->i915 = i915;
-       cache->use_64bit_reloc = INTEL_GEN(cache->i915) >= 8;
+       /* Must be a variable in the struct to allow GCC to unroll. */
+       cache->use_64bit_reloc = HAS_64BIT_RELOC(i915);
         cache->node.allocated = false;
  }
  
@@ -370,8 +370,7 @@ static void reloc_cache_fini(struct reloc_cache *cache)
  
                         ggtt->base.clear_range(&ggtt->base,
                                                cache->node.start,
-                                              cache->node.size,
-                                              true);
+                                              cache->node.size);
                         drm_mm_remove_node(&cache->node);
                 } else {
                         i915_vma_unpin((struct i915_vma *)cache->node.mm);
@@ -419,17 +418,8 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
         unsigned long offset;
         void *vaddr;
  
-       if (cache->node.allocated) {
-               wmb();
-               ggtt->base.insert_page(&ggtt->base,
-                                      i915_gem_object_get_dma_address(obj, page),
-                                      cache->node.start, I915_CACHE_NONE, 0);
-               cache->page = page;
-               return unmask_page(cache->vaddr);
-       }
-
         if (cache->vaddr) {
-               io_mapping_unmap_atomic(unmask_page(cache->vaddr));
+               io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
         } else {
                 struct i915_vma *vma;
                 int ret;
@@ -467,6 +457,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
  
         offset = cache->node.start;
         if (cache->node.allocated) {
+               wmb();
                 ggtt->base.insert_page(&ggtt->base,
                                        i915_gem_object_get_dma_address(obj, page),
                                        offset, I915_CACHE_NONE, 0);
@@ -474,7 +465,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
                 offset += page << PAGE_SHIFT;
         }
  
-       vaddr = io_mapping_map_atomic_wc(&cache->i915->ggtt.mappable, offset);
+       vaddr = (void __force *) io_mapping_map_atomic_wc(&cache->i915->ggtt.mappable, offset);
         cache->page = page;
         cache->vaddr = (unsigned long)vaddr;
  
@@ -552,27 +543,13 @@ relocate_entry(struct drm_i915_gem_object *obj,
         return 0;
  }
  
-static bool object_is_idle(struct drm_i915_gem_object *obj)
-{
-       unsigned long active = i915_gem_object_get_active(obj);
-       int idx;
-
-       for_each_active(active, idx) {
-               if (!i915_gem_active_is_idle(&obj->last_read[idx],
-                                            &obj->base.dev->struct_mutex))
-                       return false;
-       }
-
-       return true;
-}
-
  static int
  i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
                                    struct eb_vmas *eb,
                                    struct drm_i915_gem_relocation_entry *reloc,
                                    struct reloc_cache *cache)
  {
-       struct drm_device *dev = obj->base.dev;
+       struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
         struct drm_gem_object *target_obj;
         struct drm_i915_gem_object *target_i915_obj;
         struct i915_vma *target_vma;
@@ -591,7 +568,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
         /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
          * pipe_control writes because the gpu doesn't properly redirect them
          * through the ppgtt for non_secure batchbuffers. */
-       if (unlikely(IS_GEN6(dev) &&
+       if (unlikely(IS_GEN6(dev_priv) &&
             reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) {
                 ret = i915_vma_bind(target_vma, target_i915_obj->cache_level,
                                     PIN_GLOBAL);
@@ -649,10 +626,6 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
                 return -EINVAL;
         }
  
-       /* We can't wait for rendering with pagefaults disabled */
-       if (pagefault_disabled() && !object_is_idle(obj))
-               return -EFAULT;
-
         ret = relocate_entry(obj, reloc, cache, target_offset);
         if (ret)
                 return ret;
@@ -679,12 +652,23 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
         remain = entry->relocation_count;
         while (remain) {
                 struct drm_i915_gem_relocation_entry *r = stack_reloc;
-               int count = remain;
-               if (count > ARRAY_SIZE(stack_reloc))
-                       count = ARRAY_SIZE(stack_reloc);
+               unsigned long unwritten;
+               unsigned int count;
+
+               count = min_t(unsigned int, remain, ARRAY_SIZE(stack_reloc));
                 remain -= count;
  
-               if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) {
+               /* This is the fast path and we cannot handle a pagefault
+                * whilst holding the struct mutex lest the user pass in the
+                * relocations contained within a mmaped bo. For in such a case
+                * we, the page fault handler would call i915_gem_fault() and
+                * we would try to acquire the struct mutex again. Obviously
+                * this is bad and so lockdep complains vehemently.
+                */
+               pagefault_disable();
+               unwritten = __copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]));
+               pagefault_enable();
+               if (unlikely(unwritten)) {
                         ret = -EFAULT;
                         goto out;
                 }
@@ -696,11 +680,26 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
                         if (ret)
                                 goto out;
  
-                       if (r->presumed_offset != offset &&
-                           __put_user(r->presumed_offset,
-                                      &user_relocs->presumed_offset)) {
-                               ret = -EFAULT;
-                               goto out;
+                       if (r->presumed_offset != offset) {
+                               pagefault_disable();
+                               unwritten = __put_user(r->presumed_offset,
+                                                      &user_relocs->presumed_offset);
+                               pagefault_enable();
+                               if (unlikely(unwritten)) {
+                                       /* Note that reporting an error now
+                                        * leaves everything in an inconsistent
+                                        * state as we have *already* changed
+                                        * the relocation value inside the
+                                        * object. As we have not changed the
+                                        * reloc.presumed_offset or will not
+                                        * change the execobject.offset, on the
+                                        * call we may not rewrite the value
+                                        * inside the object, leaving it
+                                        * dangling and causing a GPU hang.
+                                        */
+                                       ret = -EFAULT;
+                                       goto out;
+                               }
                         }
  
                         user_relocs++;
@@ -740,20 +739,11 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
         struct i915_vma *vma;
         int ret = 0;
  
-       /* This is the fast path and we cannot handle a pagefault whilst
-        * holding the struct mutex lest the user pass in the relocations
-        * contained within a mmaped bo. For in such a case we, the page
-        * fault handler would call i915_gem_fault() and we would try to
-        * acquire the struct mutex again. Obviously this is bad and so
-        * lockdep complains vehemently.
-        */
-       pagefault_disable();
         list_for_each_entry(vma, &eb->vmas, exec_list) {
                 ret = i915_gem_execbuffer_relocate_vma(vma, eb);
                 if (ret)
                         break;
         }
-       pagefault_enable();
  
         return ret;
  }
@@ -843,7 +833,7 @@ need_reloc_mappable(struct i915_vma *vma)
                 return false;
  
         /* See also use_cpu_reloc() */
-       if (HAS_LLC(vma->obj->base.dev))
+       if (HAS_LLC(to_i915(vma->obj->base.dev)))
                 return false;
  
         if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
@@ -1111,44 +1101,20 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
         return ret;
  }
  
-static unsigned int eb_other_engines(struct drm_i915_gem_request *req)
-{
-       unsigned int mask;
-
-       mask = ~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK;
-       mask <<= I915_BO_ACTIVE_SHIFT;
-
-       return mask;
-}
-
  static int
  i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
                                 struct list_head *vmas)
  {
-       const unsigned int other_rings = eb_other_engines(req);
         struct i915_vma *vma;
         int ret;
  
         list_for_each_entry(vma, vmas, exec_list) {
                 struct drm_i915_gem_object *obj = vma->obj;
-               struct reservation_object *resv;
-
-               if (obj->flags & other_rings) {
-                       ret = i915_gem_request_await_object
-                               (req, obj, obj->base.pending_write_domain);
-                       if (ret)
-                               return ret;
-               }
  
-               resv = i915_gem_object_get_dmabuf_resv(obj);
-               if (resv) {
-                       ret = i915_sw_fence_await_reservation
-                               (&req->submit, resv, &i915_fence_ops,
-                                obj->base.pending_write_domain, 10*HZ,
-                                GFP_KERNEL | __GFP_NOWARN);
-                       if (ret < 0)
-                               return ret;
-               }
+               ret = i915_gem_request_await_object
+                       (req, obj, obj->base.pending_write_domain);
+               if (ret)
+                       return ret;
  
                 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
                         i915_gem_clflush_object(obj, false);
@@ -1215,14 +1181,14 @@ validate_exec_list(struct drm_device *dev,
                         if (exec[i].offset !=
                             gen8_canonical_addr(exec[i].offset & PAGE_MASK))
                                 return -EINVAL;
-
-                       /* From drm_mm perspective address space is continuous,
-                        * so from this point we're always using non-canonical
-                        * form internally.
-                        */
-                       exec[i].offset = gen8_noncanonical_addr(exec[i].offset);
                 }
  
+               /* From drm_mm perspective address space is continuous,
+                * so from this point we're always using non-canonical
+                * form internally.
+                */
+               exec[i].offset = gen8_noncanonical_addr(exec[i].offset);
+
                 if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
                         return -EINVAL;
  
@@ -1296,8 +1262,6 @@ void i915_vma_move_to_active(struct i915_vma *vma,
  
         GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
  
-       obj->dirty = 1; /* be paranoid  */
-
         /* Add a reference if we're newly entering the active list.
          * The order in which we add operations to the retirement queue is
          * vital here: mark_active adds to the start of the callback list,
@@ -1305,15 +1269,15 @@ void i915_vma_move_to_active(struct i915_vma *vma,
          * add the active reference first and queue for it to be dropped
          * *last*.
          */
-       if (!i915_gem_object_is_active(obj))
-               i915_gem_object_get(obj);
-       i915_gem_object_set_active(obj, idx);
-       i915_gem_active_set(&obj->last_read[idx], req);
+       if (!i915_vma_is_active(vma))
+               obj->active_count++;
+       i915_vma_set_active(vma, idx);
+       i915_gem_active_set(&vma->last_read[idx], req);
+       list_move_tail(&vma->vm_link, &vma->vm->active_list);
  
         if (flags & EXEC_OBJECT_WRITE) {
-               i915_gem_active_set(&obj->last_write, req);
-
-               intel_fb_obj_invalidate(obj, ORIGIN_CS);
+               if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
+                       i915_gem_active_set(&obj->frontbuffer_write, req);
  
                 /* update for the implicit flush after a batch */
                 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
@@ -1323,21 +1287,13 @@ void i915_vma_move_to_active(struct i915_vma *vma,
  
         if (flags & EXEC_OBJECT_NEEDS_FENCE)
                 i915_gem_active_set(&vma->last_fence, req);
-
-       i915_vma_set_active(vma, idx);
-       i915_gem_active_set(&vma->last_read[idx], req);
-       list_move_tail(&vma->vm_link, &vma->vm->active_list);
  }
  
  static void eb_export_fence(struct drm_i915_gem_object *obj,
                             struct drm_i915_gem_request *req,
                             unsigned int flags)
  {
-       struct reservation_object *resv;
-
-       resv = i915_gem_object_get_dmabuf_resv(obj);
-       if (!resv)
-               return;
+       struct reservation_object *resv = obj->resv;
  
         /* Ignore errors from failing to allocate the new fence, we can't
          * handle an error right now. Worst case should be missed
@@ -1607,12 +1563,12 @@ eb_select_engine(struct drm_i915_private *dev_priv,
                         return NULL;
                 }
  
-               engine = &dev_priv->engine[_VCS(bsd_idx)];
+               engine = dev_priv->engine[_VCS(bsd_idx)];
         } else {
-               engine = &dev_priv->engine[user_ring_map[user_ring_id]];
+               engine = dev_priv->engine[user_ring_map[user_ring_id]];
         }
  
-       if (!intel_engine_initialized(engine)) {
+       if (!engine) {
                 DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
                 return NULL;
         }
@@ -1667,7 +1623,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
         }
  
         if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
-               if (!HAS_RESOURCE_STREAMER(dev)) {
+               if (!HAS_RESOURCE_STREAMER(dev_priv)) {
                         DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n");
                         return -EINVAL;
                 }
@@ -1921,7 +1877,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
                 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
                 exec2_list[i].alignment = exec_list[i].alignment;
                 exec2_list[i].offset = exec_list[i].offset;
-               if (INTEL_INFO(dev)->gen < 4)
+               if (INTEL_GEN(to_i915(dev)) < 4)
                         exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
                 else
                         exec2_list[i].flags = 0;