Merge branches 'pm-core', 'pm-qos', 'pm-domains' and 'pm-opp'

[linux.git] / drivers / gpu / drm / i915 / i915_gem.c
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index 00eb4814b9131655b4ac441d7dbabed02c6ccc8e..24b5b046754b37e8b8e3ab2c04a9f890b41eecf9 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -29,12 +29,12 @@
  #include <drm/drm_vma_manager.h>
  #include <drm/i915_drm.h>
  #include "i915_drv.h"
-#include "i915_gem_dmabuf.h"
  #include "i915_vgpu.h"
  #include "i915_trace.h"
  #include "intel_drv.h"
  #include "intel_frontbuffer.h"
  #include "intel_mocs.h"
+#include <linux/dma-fence-array.h>
  #include <linux/reservation.h>
  #include <linux/shmem_fs.h>
  #include <linux/slab.h>
@@ -42,13 +42,14 @@
  #include <linux/pci.h>
  #include <linux/dma-buf.h>
  
+static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
  static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
  static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
  
  static bool cpu_cache_is_coherent(struct drm_device *dev,
                                   enum i915_cache_level level)
  {
-       return HAS_LLC(dev) || level != I915_CACHE_NONE;
+       return HAS_LLC(to_i915(dev)) || level != I915_CACHE_NONE;
  }
  
  static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
@@ -63,13 +64,13 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
  }
  
  static int
-insert_mappable_node(struct drm_i915_private *i915,
+insert_mappable_node(struct i915_ggtt *ggtt,
                       struct drm_mm_node *node, u32 size)
  {
         memset(node, 0, sizeof(*node));
-       return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node,
-                                                  size, 0, 0, 0,
-                                                  i915->ggtt.mappable_end,
+       return drm_mm_insert_node_in_range_generic(&ggtt->base.mm, node,
+                                                  size, 0, -1,
+                                                  0, ggtt->mappable_end,
                                                    DRM_MM_SEARCH_DEFAULT,
                                                    DRM_MM_CREATE_DEFAULT);
  }
@@ -82,7 +83,7 @@ remove_mappable_node(struct drm_mm_node *node)
  
  /* some bookkeeping */
  static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
-                                 size_t size)
+                                 u64 size)
  {
         spin_lock(&dev_priv->mm.object_stat_lock);
         dev_priv->mm.object_count++;
@@ -91,7 +92,7 @@ static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
  }
  
  static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
-                                    size_t size)
+                                    u64 size)
  {
         spin_lock(&dev_priv->mm.object_stat_lock);
         dev_priv->mm.object_count--;
@@ -104,6 +105,8 @@ i915_gem_wait_for_error(struct i915_gpu_error *error)
  {
         int ret;
  
+       might_sleep();
+
         if (!i915_reset_in_progress(error))
                 return 0;
  
@@ -114,7 +117,7 @@ i915_gem_wait_for_error(struct i915_gpu_error *error)
          */
         ret = wait_event_interruptible_timeout(error->reset_queue,
                                                !i915_reset_in_progress(error),
-                                              10*HZ);
+                                              I915_RESET_TIMEOUT);
         if (ret == 0) {
                 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
                 return -EIO;
@@ -167,25 +170,39 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
         return 0;
  }
  
-static int
+static struct sg_table *
  i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
  {
         struct address_space *mapping = obj->base.filp->f_mapping;
-       char *vaddr = obj->phys_handle->vaddr;
+       drm_dma_handle_t *phys;
         struct sg_table *st;
         struct scatterlist *sg;
+       char *vaddr;
         int i;
  
         if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
-               return -EINVAL;
+               return ERR_PTR(-EINVAL);
+
+       /* Always aligning to the object size, allows a single allocation
+        * to handle all possible callers, and given typical object sizes,
+        * the alignment of the buddy allocation will naturally match.
+        */
+       phys = drm_pci_alloc(obj->base.dev,
+                            obj->base.size,
+                            roundup_pow_of_two(obj->base.size));
+       if (!phys)
+               return ERR_PTR(-ENOMEM);
  
+       vaddr = phys->vaddr;
         for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
                 struct page *page;
                 char *src;
  
                 page = shmem_read_mapping_page(mapping, i);
-               if (IS_ERR(page))
-                       return PTR_ERR(page);
+               if (IS_ERR(page)) {
+                       st = ERR_CAST(page);
+                       goto err_phys;
+               }
  
                 src = kmap_atomic(page);
                 memcpy(vaddr, src, PAGE_SIZE);
@@ -199,44 +216,58 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
         i915_gem_chipset_flush(to_i915(obj->base.dev));
  
         st = kmalloc(sizeof(*st), GFP_KERNEL);
-       if (st == NULL)
-               return -ENOMEM;
+       if (!st) {
+               st = ERR_PTR(-ENOMEM);
+               goto err_phys;
+       }
  
         if (sg_alloc_table(st, 1, GFP_KERNEL)) {
                 kfree(st);
-               return -ENOMEM;
+               st = ERR_PTR(-ENOMEM);
+               goto err_phys;
         }
  
         sg = st->sgl;
         sg->offset = 0;
         sg->length = obj->base.size;
  
-       sg_dma_address(sg) = obj->phys_handle->busaddr;
+       sg_dma_address(sg) = phys->busaddr;
         sg_dma_len(sg) = obj->base.size;
  
-       obj->pages = st;
-       return 0;
+       obj->phys_handle = phys;
+       return st;
+
+err_phys:
+       drm_pci_free(obj->base.dev, phys);
+       return st;
  }
  
  static void
-i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
+__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
+                               struct sg_table *pages,
+                               bool needs_clflush)
  {
-       int ret;
+       GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
  
-       BUG_ON(obj->madv == __I915_MADV_PURGED);
+       if (obj->mm.madv == I915_MADV_DONTNEED)
+               obj->mm.dirty = false;
  
-       ret = i915_gem_object_set_to_cpu_domain(obj, true);
-       if (WARN_ON(ret)) {
-               /* In the event of a disaster, abandon all caches and
-                * hope for the best.
-                */
-               obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
-       }
+       if (needs_clflush &&
+           (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
+           !cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+               drm_clflush_sg(pages);
+
+       obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+       obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+}
  
-       if (obj->madv == I915_MADV_DONTNEED)
-               obj->dirty = 0;
+static void
+i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
+                              struct sg_table *pages)
+{
+       __i915_gem_object_release_shmem(obj, pages, false);
  
-       if (obj->dirty) {
+       if (obj->mm.dirty) {
                 struct address_space *mapping = obj->base.filp->f_mapping;
                 char *vaddr = obj->phys_handle->vaddr;
                 int i;
@@ -255,22 +286,24 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
                         kunmap_atomic(dst);
  
                         set_page_dirty(page);
-                       if (obj->madv == I915_MADV_WILLNEED)
+                       if (obj->mm.madv == I915_MADV_WILLNEED)
                                 mark_page_accessed(page);
                         put_page(page);
                         vaddr += PAGE_SIZE;
                 }
-               obj->dirty = 0;
+               obj->mm.dirty = false;
         }
  
-       sg_free_table(obj->pages);
-       kfree(obj->pages);
+       sg_free_table(pages);
+       kfree(pages);
+
+       drm_pci_free(obj->base.dev, obj->phys_handle);
  }
  
  static void
  i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
  {
-       drm_pci_free(obj->base.dev, obj->phys_handle);
+       i915_gem_object_unpin_pages(obj);
  }
  
  static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
@@ -292,7 +325,12 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
          * must wait for all rendering to complete to the object (as unbinding
          * must anyway), and retire the requests.
          */
-       ret = i915_gem_object_wait_rendering(obj, false);
+       ret = i915_gem_object_wait(obj,
+                                  I915_WAIT_INTERRUPTIBLE |
+                                  I915_WAIT_LOCKED |
+                                  I915_WAIT_ALL,
+                                  MAX_SCHEDULE_TIMEOUT,
+                                  NULL);
         if (ret)
                 return ret;
  
@@ -311,90 +349,209 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
         return ret;
  }
  
-/**
- * Ensures that all rendering to the object has completed and the object is
- * safe to unbind from the GTT or access from the CPU.
- * @obj: i915 gem object
- * @readonly: waiting for just read access or read-write access
- */
-int
-i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
-                              bool readonly)
+static long
+i915_gem_object_wait_fence(struct dma_fence *fence,
+                          unsigned int flags,
+                          long timeout,
+                          struct intel_rps_client *rps)
  {
-       struct reservation_object *resv;
-       struct i915_gem_active *active;
-       unsigned long active_mask;
-       int idx;
+       struct drm_i915_gem_request *rq;
  
-       lockdep_assert_held(&obj->base.dev->struct_mutex);
+       BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
  
-       if (!readonly) {
-               active = obj->last_read;
-               active_mask = i915_gem_object_get_active(obj);
-       } else {
-               active_mask = 1;
-               active = &obj->last_write;
+       if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+               return timeout;
+
+       if (!dma_fence_is_i915(fence))
+               return dma_fence_wait_timeout(fence,
+                                             flags & I915_WAIT_INTERRUPTIBLE,
+                                             timeout);
+
+       rq = to_request(fence);
+       if (i915_gem_request_completed(rq))
+               goto out;
+
+       /* This client is about to stall waiting for the GPU. In many cases
+        * this is undesirable and limits the throughput of the system, as
+        * many clients cannot continue processing user input/output whilst
+        * blocked. RPS autotuning may take tens of milliseconds to respond
+        * to the GPU load and thus incurs additional latency for the client.
+        * We can circumvent that by promoting the GPU frequency to maximum
+        * before we wait. This makes the GPU throttle up much more quickly
+        * (good for benchmarks and user experience, e.g. window animations),
+        * but at a cost of spending more power processing the workload
+        * (bad for battery). Not all clients even want their results
+        * immediately and for them we should just let the GPU select its own
+        * frequency to maximise efficiency. To prevent a single client from
+        * forcing the clocks too high for the whole system, we only allow
+        * each client to waitboost once in a busy period.
+        */
+       if (rps) {
+               if (INTEL_GEN(rq->i915) >= 6)
+                       gen6_rps_boost(rq->i915, rps, rq->emitted_jiffies);
+               else
+                       rps = NULL;
+       }
+
+       timeout = i915_wait_request(rq, flags, timeout);
+
+out:
+       if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
+               i915_gem_request_retire_upto(rq);
+
+       if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) {
+               /* The GPU is now idle and this client has stalled.
+                * Since no other client has submitted a request in the
+                * meantime, assume that this client is the only one
+                * supplying work to the GPU but is unable to keep that
+                * work supplied because it is waiting. Since the GPU is
+                * then never kept fully busy, RPS autoclocking will
+                * keep the clocks relatively low, causing further delays.
+                * Compensate by giving the synchronous client credit for
+                * a waitboost next time.
+                */
+               spin_lock(&rq->i915->rps.client_lock);
+               list_del_init(&rps->link);
+               spin_unlock(&rq->i915->rps.client_lock);
         }
  
-       for_each_active(active_mask, idx) {
+       return timeout;
+}
+
+static long
+i915_gem_object_wait_reservation(struct reservation_object *resv,
+                                unsigned int flags,
+                                long timeout,
+                                struct intel_rps_client *rps)
+{
+       struct dma_fence *excl;
+
+       if (flags & I915_WAIT_ALL) {
+               struct dma_fence **shared;
+               unsigned int count, i;
                 int ret;
  
-               ret = i915_gem_active_wait(&active[idx],
-                                          &obj->base.dev->struct_mutex);
+               ret = reservation_object_get_fences_rcu(resv,
+                                                       &excl, &count, &shared);
                 if (ret)
                         return ret;
-       }
  
-       resv = i915_gem_object_get_dmabuf_resv(obj);
-       if (resv) {
-               long err;
+               for (i = 0; i < count; i++) {
+                       timeout = i915_gem_object_wait_fence(shared[i],
+                                                            flags, timeout,
+                                                            rps);
+                       if (timeout <= 0)
+                               break;
+
+                       dma_fence_put(shared[i]);
+               }
  
-               err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
-                                                         MAX_SCHEDULE_TIMEOUT);
-               if (err < 0)
-                       return err;
+               for (; i < count; i++)
+                       dma_fence_put(shared[i]);
+               kfree(shared);
+       } else {
+               excl = reservation_object_get_excl_rcu(resv);
         }
  
-       return 0;
+       if (excl && timeout > 0)
+               timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps);
+
+       dma_fence_put(excl);
+
+       return timeout;
  }
  
-/* A nonblocking variant of the above wait. Must be called prior to
- * acquiring the mutex for the object, as the object state may change
- * during this call. A reference must be held by the caller for the object.
- */
-static __must_check int
-__unsafe_wait_rendering(struct drm_i915_gem_object *obj,
-                       struct intel_rps_client *rps,
-                       bool readonly)
+static void __fence_set_priority(struct dma_fence *fence, int prio)
  {
-       struct i915_gem_active *active;
-       unsigned long active_mask;
-       int idx;
+       struct drm_i915_gem_request *rq;
+       struct intel_engine_cs *engine;
  
-       active_mask = __I915_BO_ACTIVE(obj);
-       if (!active_mask)
-               return 0;
+       if (!dma_fence_is_i915(fence))
+               return;
+
+       rq = to_request(fence);
+       engine = rq->engine;
+       if (!engine->schedule)
+               return;
+
+       engine->schedule(rq, prio);
+}
  
-       if (!readonly) {
-               active = obj->last_read;
+static void fence_set_priority(struct dma_fence *fence, int prio)
+{
+       /* Recurse once into a fence-array */
+       if (dma_fence_is_array(fence)) {
+               struct dma_fence_array *array = to_dma_fence_array(fence);
+               int i;
+
+               for (i = 0; i < array->num_fences; i++)
+                       __fence_set_priority(array->fences[i], prio);
         } else {
-               active_mask = 1;
-               active = &obj->last_write;
+               __fence_set_priority(fence, prio);
         }
+}
+
+int
+i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
+                             unsigned int flags,
+                             int prio)
+{
+       struct dma_fence *excl;
  
-       for_each_active(active_mask, idx) {
+       if (flags & I915_WAIT_ALL) {
+               struct dma_fence **shared;
+               unsigned int count, i;
                 int ret;
  
-               ret = i915_gem_active_wait_unlocked(&active[idx],
-                                                   I915_WAIT_INTERRUPTIBLE,
-                                                   NULL, rps);
+               ret = reservation_object_get_fences_rcu(obj->resv,
+                                                       &excl, &count, &shared);
                 if (ret)
                         return ret;
+
+               for (i = 0; i < count; i++) {
+                       fence_set_priority(shared[i], prio);
+                       dma_fence_put(shared[i]);
+               }
+
+               kfree(shared);
+       } else {
+               excl = reservation_object_get_excl_rcu(obj->resv);
         }
  
+       if (excl) {
+               fence_set_priority(excl, prio);
+               dma_fence_put(excl);
+       }
         return 0;
  }
  
+/**
+ * Waits for rendering to the object to be completed
+ * @obj: i915 gem object
+ * @flags: how to wait (under a lock, for all rendering or just for writes etc)
+ * @timeout: how long to wait
+ * @rps: client (user process) to charge for any waitboosting
+ */
+int
+i915_gem_object_wait(struct drm_i915_gem_object *obj,
+                    unsigned int flags,
+                    long timeout,
+                    struct intel_rps_client *rps)
+{
+       might_sleep();
+#if IS_ENABLED(CONFIG_LOCKDEP)
+       GEM_BUG_ON(debug_locks &&
+                  !!lockdep_is_held(&obj->base.dev->struct_mutex) !=
+                  !!(flags & I915_WAIT_LOCKED));
+#endif
+       GEM_BUG_ON(timeout < 0);
+
+       timeout = i915_gem_object_wait_reservation(obj->resv,
+                                                  flags, timeout,
+                                                  rps);
+       return timeout < 0 ? timeout : 0;
+}
+
  static struct intel_rps_client *to_rps_client(struct drm_file *file)
  {
         struct drm_i915_file_private *fpriv = file->driver_priv;
@@ -406,17 +563,15 @@ int
  i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
                             int align)
  {
-       drm_dma_handle_t *phys;
         int ret;
  
-       if (obj->phys_handle) {
-               if ((unsigned long)obj->phys_handle->vaddr & (align -1))
-                       return -EBUSY;
+       if (align > obj->base.size)
+               return -EINVAL;
  
+       if (obj->ops == &i915_gem_phys_ops)
                 return 0;
-       }
  
-       if (obj->madv != I915_MADV_WILLNEED)
+       if (obj->mm.madv != I915_MADV_WILLNEED)
                 return -EFAULT;
  
         if (obj->base.filp == NULL)
@@ -426,61 +581,35 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
         if (ret)
                 return ret;
  
-       ret = i915_gem_object_put_pages(obj);
-       if (ret)
-               return ret;
-
-       /* create a new object */
-       phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
-       if (!phys)
-               return -ENOMEM;
+       __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+       if (obj->mm.pages)
+               return -EBUSY;
  
-       obj->phys_handle = phys;
         obj->ops = &i915_gem_phys_ops;
  
-       return i915_gem_object_get_pages(obj);
+       return i915_gem_object_pin_pages(obj);
  }
  
  static int
  i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
                      struct drm_i915_gem_pwrite *args,
-                    struct drm_file *file_priv)
+                    struct drm_file *file)
  {
-       struct drm_device *dev = obj->base.dev;
         void *vaddr = obj->phys_handle->vaddr + args->offset;
         char __user *user_data = u64_to_user_ptr(args->data_ptr);
-       int ret = 0;
  
         /* We manually control the domain here and pretend that it
          * remains coherent i.e. in the GTT domain, like shmem_pwrite.
          */
-       ret = i915_gem_object_wait_rendering(obj, false);
-       if (ret)
-               return ret;
-
         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
-       if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
-               unsigned long unwritten;
-
-               /* The physical object once assigned is fixed for the lifetime
-                * of the obj, so we can safely drop the lock and continue
-                * to access vaddr.
-                */
-               mutex_unlock(&dev->struct_mutex);
-               unwritten = copy_from_user(vaddr, user_data, args->size);
-               mutex_lock(&dev->struct_mutex);
-               if (unwritten) {
-                       ret = -EFAULT;
-                       goto out;
-               }
-       }
+       if (copy_from_user(vaddr, user_data, args->size))
+               return -EFAULT;
  
         drm_clflush_virt_range(vaddr, args->size);
-       i915_gem_chipset_flush(to_i915(dev));
+       i915_gem_chipset_flush(to_i915(obj->base.dev));
  
-out:
         intel_fb_obj_flush(obj, false, ORIGIN_CPU);
-       return ret;
+       return 0;
  }
  
  void *i915_gem_object_alloc(struct drm_device *dev)
@@ -516,7 +645,7 @@ i915_gem_create(struct drm_file *file,
  
         ret = drm_gem_handle_create(file, &obj->base, &handle);
         /* drop reference from allocate - handle holds it now */
-       i915_gem_object_put_unlocked(obj);
+       i915_gem_object_put(obj);
         if (ret)
                 return ret;
  
@@ -548,6 +677,8 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
  {
         struct drm_i915_gem_create *args = data;
  
+       i915_gem_flush_free_objects(to_i915(dev));
+
         return i915_gem_create(file, dev,
                                args->size, &args->handle);
  }
@@ -614,21 +745,24 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
  {
         int ret;
  
-       *needs_clflush = 0;
+       lockdep_assert_held(&obj->base.dev->struct_mutex);
  
+       *needs_clflush = 0;
         if (!i915_gem_object_has_struct_page(obj))
                 return -ENODEV;
  
-       ret = i915_gem_object_wait_rendering(obj, true);
+       ret = i915_gem_object_wait(obj,
+                                  I915_WAIT_INTERRUPTIBLE |
+                                  I915_WAIT_LOCKED,
+                                  MAX_SCHEDULE_TIMEOUT,
+                                  NULL);
         if (ret)
                 return ret;
  
-       ret = i915_gem_object_get_pages(obj);
+       ret = i915_gem_object_pin_pages(obj);
         if (ret)
                 return ret;
  
-       i915_gem_object_pin_pages(obj);
-
         i915_gem_object_flush_gtt_write_domain(obj);
  
         /* If we're not in the cpu read domain, set ourself into the gtt
@@ -661,20 +795,25 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
  {
         int ret;
  
+       lockdep_assert_held(&obj->base.dev->struct_mutex);
+
         *needs_clflush = 0;
         if (!i915_gem_object_has_struct_page(obj))
                 return -ENODEV;
  
-       ret = i915_gem_object_wait_rendering(obj, false);
+       ret = i915_gem_object_wait(obj,
+                                  I915_WAIT_INTERRUPTIBLE |
+                                  I915_WAIT_LOCKED |
+                                  I915_WAIT_ALL,
+                                  MAX_SCHEDULE_TIMEOUT,
+                                  NULL);
         if (ret)
                 return ret;
  
-       ret = i915_gem_object_get_pages(obj);
+       ret = i915_gem_object_pin_pages(obj);
         if (ret)
                 return ret;
  
-       i915_gem_object_pin_pages(obj);
-
         i915_gem_object_flush_gtt_write_domain(obj);
  
         /* If we're not in the cpu write domain, set ourself into the
@@ -704,7 +843,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
                 obj->cache_dirty = true;
  
         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
-       obj->dirty = 1;
+       obj->mm.dirty = true;
         /* return with the pages pinned */
         return 0;
  
@@ -713,32 +852,6 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
         return ret;
  }
  
-/* Per-page copy function for the shmem pread fastpath.
- * Flushes invalid cachelines before reading the target if
- * needs_clflush is set. */
-static int
-shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
-                char __user *user_data,
-                bool page_do_bit17_swizzling, bool needs_clflush)
-{
-       char *vaddr;
-       int ret;
-
-       if (unlikely(page_do_bit17_swizzling))
-               return -EINVAL;
-
-       vaddr = kmap_atomic(page);
-       if (needs_clflush)
-               drm_clflush_virt_range(vaddr + shmem_page_offset,
-                                      page_length);
-       ret = __copy_to_user_inatomic(user_data,
-                                     vaddr + shmem_page_offset,
-                                     page_length);
-       kunmap_atomic(vaddr);
-
-       return ret ? -EFAULT : 0;
-}
-
  static void
  shmem_clflush_swizzled_range(char *addr, unsigned long length,
                              bool swizzled)
@@ -764,7 +877,7 @@ shmem_clflush_swizzled_range(char *addr, unsigned long length,
  /* Only difference to the fast-path function is that this can handle bit17
   * and uses non-atomic copy and kmap functions. */
  static int
-shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
+shmem_pread_slow(struct page *page, int offset, int length,
                  char __user *user_data,
                  bool page_do_bit17_swizzling, bool needs_clflush)
  {
@@ -773,60 +886,130 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
  
         vaddr = kmap(page);
         if (needs_clflush)
-               shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
-                                            page_length,
+               shmem_clflush_swizzled_range(vaddr + offset, length,
                                              page_do_bit17_swizzling);
  
         if (page_do_bit17_swizzling)
-               ret = __copy_to_user_swizzled(user_data,
-                                             vaddr, shmem_page_offset,
-                                             page_length);
+               ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
         else
-               ret = __copy_to_user(user_data,
-                                    vaddr + shmem_page_offset,
-                                    page_length);
+               ret = __copy_to_user(user_data, vaddr + offset, length);
         kunmap(page);
  
         return ret ? - EFAULT : 0;
  }
  
-static inline unsigned long
-slow_user_access(struct io_mapping *mapping,
-                uint64_t page_base, int page_offset,
-                char __user *user_data,
-                unsigned long length, bool pwrite)
+static int
+shmem_pread(struct page *page, int offset, int length, char __user *user_data,
+           bool page_do_bit17_swizzling, bool needs_clflush)
+{
+       int ret;
+
+       ret = -ENODEV;
+       if (!page_do_bit17_swizzling) {
+               char *vaddr = kmap_atomic(page);
+
+               if (needs_clflush)
+                       drm_clflush_virt_range(vaddr + offset, length);
+               ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
+               kunmap_atomic(vaddr);
+       }
+       if (ret == 0)
+               return 0;
+
+       return shmem_pread_slow(page, offset, length, user_data,
+                               page_do_bit17_swizzling, needs_clflush);
+}
+
+static int
+i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
+                    struct drm_i915_gem_pread *args)
+{
+       char __user *user_data;
+       u64 remain;
+       unsigned int obj_do_bit17_swizzling;
+       unsigned int needs_clflush;
+       unsigned int idx, offset;
+       int ret;
+
+       obj_do_bit17_swizzling = 0;
+       if (i915_gem_object_needs_bit17_swizzle(obj))
+               obj_do_bit17_swizzling = BIT(17);
+
+       ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
+       if (ret)
+               return ret;
+
+       ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
+       mutex_unlock(&obj->base.dev->struct_mutex);
+       if (ret)
+               return ret;
+
+       remain = args->size;
+       user_data = u64_to_user_ptr(args->data_ptr);
+       offset = offset_in_page(args->offset);
+       for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
+               struct page *page = i915_gem_object_get_page(obj, idx);
+               int length;
+
+               length = remain;
+               if (offset + length > PAGE_SIZE)
+                       length = PAGE_SIZE - offset;
+
+               ret = shmem_pread(page, offset, length, user_data,
+                                 page_to_phys(page) & obj_do_bit17_swizzling,
+                                 needs_clflush);
+               if (ret)
+                       break;
+
+               remain -= length;
+               user_data += length;
+               offset = 0;
+       }
+
+       i915_gem_obj_finish_shmem_access(obj);
+       return ret;
+}
+
+static inline bool
+gtt_user_read(struct io_mapping *mapping,
+             loff_t base, int offset,
+             char __user *user_data, int length)
  {
-       void __iomem *ioaddr;
         void *vaddr;
-       uint64_t unwritten;
+       unsigned long unwritten;
  
-       ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
         /* We can use the cpu mem copy function because this is X86. */
-       vaddr = (void __force *)ioaddr + page_offset;
-       if (pwrite)
-               unwritten = __copy_from_user(vaddr, user_data, length);
-       else
-               unwritten = __copy_to_user(user_data, vaddr, length);
-
-       io_mapping_unmap(ioaddr);
+       vaddr = (void __force *)io_mapping_map_atomic_wc(mapping, base);
+       unwritten = __copy_to_user_inatomic(user_data, vaddr + offset, length);
+       io_mapping_unmap_atomic(vaddr);
+       if (unwritten) {
+               vaddr = (void __force *)
+                       io_mapping_map_wc(mapping, base, PAGE_SIZE);
+               unwritten = copy_to_user(user_data, vaddr + offset, length);
+               io_mapping_unmap(vaddr);
+       }
         return unwritten;
  }
  
  static int
-i915_gem_gtt_pread(struct drm_device *dev,
-                  struct drm_i915_gem_object *obj, uint64_t size,
-                  uint64_t data_offset, uint64_t data_ptr)
+i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
+                  const struct drm_i915_gem_pread *args)
  {
-       struct drm_i915_private *dev_priv = to_i915(dev);
-       struct i915_ggtt *ggtt = &dev_priv->ggtt;
-       struct i915_vma *vma;
+       struct drm_i915_private *i915 = to_i915(obj->base.dev);
+       struct i915_ggtt *ggtt = &i915->ggtt;
         struct drm_mm_node node;
-       char __user *user_data;
-       uint64_t remain;
-       uint64_t offset;
+       struct i915_vma *vma;
+       void __user *user_data;
+       u64 remain, offset;
         int ret;
  
-       vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
+       ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
+       if (ret)
+               return ret;
+
+       intel_runtime_pm_get(i915);
+       vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+                                      PIN_MAPPABLE | PIN_NONBLOCK);
         if (!IS_ERR(vma)) {
                 node.start = i915_ggtt_offset(vma);
                 node.allocated = false;
@@ -837,35 +1020,21 @@ i915_gem_gtt_pread(struct drm_device *dev,
                 }
         }
         if (IS_ERR(vma)) {
-               ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE);
+               ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
                 if (ret)
-                       goto out;
-
-               ret = i915_gem_object_get_pages(obj);
-               if (ret) {
-                       remove_mappable_node(&node);
-                       goto out;
-               }
-
-               i915_gem_object_pin_pages(obj);
+                       goto out_unlock;
+               GEM_BUG_ON(!node.allocated);
         }
  
         ret = i915_gem_object_set_to_gtt_domain(obj, false);
         if (ret)
                 goto out_unpin;
  
-       user_data = u64_to_user_ptr(data_ptr);
-       remain = size;
-       offset = data_offset;
+       mutex_unlock(&i915->drm.struct_mutex);
  
-       mutex_unlock(&dev->struct_mutex);
-       if (likely(!i915.prefault_disable)) {
-               ret = fault_in_pages_writeable(user_data, remain);
-               if (ret) {
-                       mutex_lock(&dev->struct_mutex);
-                       goto out_unpin;
-               }
-       }
+       user_data = u64_to_user_ptr(args->data_ptr);
+       remain = args->size;
+       offset = args->offset;
  
         while (remain > 0) {
                 /* Operation in this page
@@ -882,19 +1051,14 @@ i915_gem_gtt_pread(struct drm_device *dev,
                         wmb();
                         ggtt->base.insert_page(&ggtt->base,
                                                i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
-                                              node.start,
-                                              I915_CACHE_NONE, 0);
+                                              node.start, I915_CACHE_NONE, 0);
                         wmb();
                 } else {
                         page_base += offset & PAGE_MASK;
                 }
-               /* This is a slow read/write as it tries to read from
-                * and write to user memory which may result into page
-                * faults, and so we cannot perform this under struct_mutex.
-                */
-               if (slow_user_access(&ggtt->mappable, page_base,
-                                    page_offset, user_data,
-                                    page_length, false)) {
+
+               if (gtt_user_read(&ggtt->mappable, page_base, page_offset,
+                                 user_data, page_length)) {
                         ret = -EFAULT;
                         break;
                 }
@@ -904,111 +1068,19 @@ i915_gem_gtt_pread(struct drm_device *dev,
                 offset += page_length;
         }
  
-       mutex_lock(&dev->struct_mutex);
-       if (ret == 0 && (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
-               /* The user has modified the object whilst we tried
-                * reading from it, and we now have no idea what domain
-                * the pages should be in. As we have just been touching
-                * them directly, flush everything back to the GTT
-                * domain.
-                */
-               ret = i915_gem_object_set_to_gtt_domain(obj, false);
-       }
-
+       mutex_lock(&i915->drm.struct_mutex);
  out_unpin:
         if (node.allocated) {
                 wmb();
                 ggtt->base.clear_range(&ggtt->base,
-                                      node.start, node.size,
-                                      true);
-               i915_gem_object_unpin_pages(obj);
+                                      node.start, node.size);
                 remove_mappable_node(&node);
         } else {
                 i915_vma_unpin(vma);
         }
-out:
-       return ret;
-}
-
-static int
-i915_gem_shmem_pread(struct drm_device *dev,
-                    struct drm_i915_gem_object *obj,
-                    struct drm_i915_gem_pread *args,
-                    struct drm_file *file)
-{
-       char __user *user_data;
-       ssize_t remain;
-       loff_t offset;
-       int shmem_page_offset, page_length, ret = 0;
-       int obj_do_bit17_swizzling, page_do_bit17_swizzling;
-       int prefaulted = 0;
-       int needs_clflush = 0;
-       struct sg_page_iter sg_iter;
-
-       ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
-       if (ret)
-               return ret;
-
-       obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
-       user_data = u64_to_user_ptr(args->data_ptr);
-       offset = args->offset;
-       remain = args->size;
-
-       for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
-                        offset >> PAGE_SHIFT) {
-               struct page *page = sg_page_iter_page(&sg_iter);
-
-               if (remain <= 0)
-                       break;
-
-               /* Operation in this page
-                *
-                * shmem_page_offset = offset within page in shmem file
-                * page_length = bytes to copy for this page
-                */
-               shmem_page_offset = offset_in_page(offset);
-               page_length = remain;
-               if ((shmem_page_offset + page_length) > PAGE_SIZE)
-                       page_length = PAGE_SIZE - shmem_page_offset;
-
-               page_do_bit17_swizzling = obj_do_bit17_swizzling &&
-                       (page_to_phys(page) & (1 << 17)) != 0;
-
-               ret = shmem_pread_fast(page, shmem_page_offset, page_length,
-                                      user_data, page_do_bit17_swizzling,
-                                      needs_clflush);
-               if (ret == 0)
-                       goto next_page;
-
-               mutex_unlock(&dev->struct_mutex);
-
-               if (likely(!i915.prefault_disable) && !prefaulted) {
-                       ret = fault_in_pages_writeable(user_data, remain);
-                       /* Userspace is tricking us, but we've already clobbered
-                        * its pages with the prefault and promised to write the
-                        * data up to the first fault. Hence ignore any errors
-                        * and just continue. */
-                       (void)ret;
-                       prefaulted = 1;
-               }
-
-               ret = shmem_pread_slow(page, shmem_page_offset, page_length,
-                                      user_data, page_do_bit17_swizzling,
-                                      needs_clflush);
-
-               mutex_lock(&dev->struct_mutex);
-
-               if (ret)
-                       goto out;
-
-next_page:
-               remain -= page_length;
-               user_data += page_length;
-               offset += page_length;
-       }
-
-out:
-       i915_gem_obj_finish_shmem_access(obj);
+out_unlock:
+       intel_runtime_pm_put(i915);
+       mutex_unlock(&i915->drm.struct_mutex);
  
         return ret;
  }
@@ -1027,7 +1099,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
  {
         struct drm_i915_gem_pread *args = data;
         struct drm_i915_gem_object *obj;
-       int ret = 0;
+       int ret;
  
         if (args->size == 0)
                 return 0;
@@ -1045,36 +1117,29 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
         if (args->offset > obj->base.size ||
             args->size > obj->base.size - args->offset) {
                 ret = -EINVAL;
-               goto err;
+               goto out;
         }
  
         trace_i915_gem_object_pread(obj, args->offset, args->size);
  
-       ret = __unsafe_wait_rendering(obj, to_rps_client(file), true);
+       ret = i915_gem_object_wait(obj,
+                                  I915_WAIT_INTERRUPTIBLE,
+                                  MAX_SCHEDULE_TIMEOUT,
+                                  to_rps_client(file));
         if (ret)
-               goto err;
+               goto out;
  
-       ret = i915_mutex_lock_interruptible(dev);
+       ret = i915_gem_object_pin_pages(obj);
         if (ret)
-               goto err;
-
-       ret = i915_gem_shmem_pread(dev, obj, args, file);
+               goto out;
  
-       /* pread for non shmem backed objects */
-       if (ret == -EFAULT || ret == -ENODEV) {
-               intel_runtime_pm_get(to_i915(dev));
-               ret = i915_gem_gtt_pread(dev, obj, args->size,
-                                       args->offset, args->data_ptr);
-               intel_runtime_pm_put(to_i915(dev));
-       }
+       ret = i915_gem_shmem_pread(obj, args);
+       if (ret == -EFAULT || ret == -ENODEV)
+               ret = i915_gem_gtt_pread(obj, args);
  
+       i915_gem_object_unpin_pages(obj);
+out:
         i915_gem_object_put(obj);
-       mutex_unlock(&dev->struct_mutex);
-
-       return ret;
-
-err:
-       i915_gem_object_put_unlocked(obj);
         return ret;
  }
  
@@ -1082,51 +1147,52 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
   * page faults in the source data
   */
  
-static inline int
-fast_user_write(struct io_mapping *mapping,
-               loff_t page_base, int page_offset,
-               char __user *user_data,
-               int length)
+static inline bool
+ggtt_write(struct io_mapping *mapping,
+          loff_t base, int offset,
+          char __user *user_data, int length)
  {
-       void __iomem *vaddr_atomic;
         void *vaddr;
         unsigned long unwritten;
  
-       vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
         /* We can use the cpu mem copy function because this is X86. */
-       vaddr = (void __force*)vaddr_atomic + page_offset;
-       unwritten = __copy_from_user_inatomic_nocache(vaddr,
+       vaddr = (void __force *)io_mapping_map_atomic_wc(mapping, base);
+       unwritten = __copy_from_user_inatomic_nocache(vaddr + offset,
                                                       user_data, length);
-       io_mapping_unmap_atomic(vaddr_atomic);
+       io_mapping_unmap_atomic(vaddr);
+       if (unwritten) {
+               vaddr = (void __force *)
+                       io_mapping_map_wc(mapping, base, PAGE_SIZE);
+               unwritten = copy_from_user(vaddr + offset, user_data, length);
+               io_mapping_unmap(vaddr);
+       }
+
         return unwritten;
  }
  
  /**
   * This is the fast pwrite path, where we copy the data directly from the
   * user into the GTT, uncached.
- * @i915: i915 device private data
- * @obj: i915 gem object
+ * @obj: i915 GEM object
   * @args: pwrite arguments structure
- * @file: drm file pointer
   */
  static int
-i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
-                        struct drm_i915_gem_object *obj,
-                        struct drm_i915_gem_pwrite *args,
-                        struct drm_file *file)
+i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
+                        const struct drm_i915_gem_pwrite *args)
  {
+       struct drm_i915_private *i915 = to_i915(obj->base.dev);
         struct i915_ggtt *ggtt = &i915->ggtt;
-       struct drm_device *dev = obj->base.dev;
-       struct i915_vma *vma;
         struct drm_mm_node node;
-       uint64_t remain, offset;
-       char __user *user_data;
+       struct i915_vma *vma;
+       u64 remain, offset;
+       void __user *user_data;
         int ret;
-       bool hit_slow_path = false;
  
-       if (i915_gem_object_is_tiled(obj))
-               return -EFAULT;
+       ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
+       if (ret)
+               return ret;
  
+       intel_runtime_pm_get(i915);
         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
                                        PIN_MAPPABLE | PIN_NONBLOCK);
         if (!IS_ERR(vma)) {
@@ -1139,25 +1205,19 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
                 }
         }
         if (IS_ERR(vma)) {
-               ret = insert_mappable_node(i915, &node, PAGE_SIZE);
+               ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
                 if (ret)
-                       goto out;
-
-               ret = i915_gem_object_get_pages(obj);
-               if (ret) {
-                       remove_mappable_node(&node);
-                       goto out;
-               }
-
-               i915_gem_object_pin_pages(obj);
+                       goto out_unlock;
+               GEM_BUG_ON(!node.allocated);
         }
  
         ret = i915_gem_object_set_to_gtt_domain(obj, true);
         if (ret)
                 goto out_unpin;
  
+       mutex_unlock(&i915->drm.struct_mutex);
+
         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
-       obj->dirty = true;
  
         user_data = u64_to_user_ptr(args->data_ptr);
         offset = args->offset;
@@ -1170,8 +1230,8 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
                  * page_length = bytes to copy for this page
                  */
                 u32 page_base = node.start;
-               unsigned page_offset = offset_in_page(offset);
-               unsigned page_length = PAGE_SIZE - page_offset;
+               unsigned int page_offset = offset_in_page(offset);
+               unsigned int page_length = PAGE_SIZE - page_offset;
                 page_length = remain < page_length ? remain : page_length;
                 if (node.allocated) {
                         wmb(); /* flush the write before we modify the GGTT */
@@ -1188,92 +1248,36 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
                  * If the object is non-shmem backed, we retry again with the
                  * path that handles page fault.
                  */
-               if (fast_user_write(&ggtt->mappable, page_base,
-                                   page_offset, user_data, page_length)) {
-                       hit_slow_path = true;
-                       mutex_unlock(&dev->struct_mutex);
-                       if (slow_user_access(&ggtt->mappable,
-                                            page_base,
-                                            page_offset, user_data,
-                                            page_length, true)) {
-                               ret = -EFAULT;
-                               mutex_lock(&dev->struct_mutex);
-                               goto out_flush;
-                       }
-
-                       mutex_lock(&dev->struct_mutex);
+               if (ggtt_write(&ggtt->mappable, page_base, page_offset,
+                              user_data, page_length)) {
+                       ret = -EFAULT;
+                       break;
                 }
  
                 remain -= page_length;
                 user_data += page_length;
                 offset += page_length;
         }
-
-out_flush:
-       if (hit_slow_path) {
-               if (ret == 0 &&
-                   (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
-                       /* The user has modified the object whilst we tried
-                        * reading from it, and we now have no idea what domain
-                        * the pages should be in. As we have just been touching
-                        * them directly, flush everything back to the GTT
-                        * domain.
-                        */
-                       ret = i915_gem_object_set_to_gtt_domain(obj, false);
-               }
-       }
-
         intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+
+       mutex_lock(&i915->drm.struct_mutex);
  out_unpin:
         if (node.allocated) {
                 wmb();
                 ggtt->base.clear_range(&ggtt->base,
-                                      node.start, node.size,
-                                      true);
-               i915_gem_object_unpin_pages(obj);
+                                      node.start, node.size);
                 remove_mappable_node(&node);
         } else {
                 i915_vma_unpin(vma);
         }
-out:
+out_unlock:
+       intel_runtime_pm_put(i915);
+       mutex_unlock(&i915->drm.struct_mutex);
         return ret;
  }
  
-/* Per-page copy function for the shmem pwrite fastpath.
- * Flushes invalid cachelines before writing to the target if
- * needs_clflush_before is set and flushes out any written cachelines after
- * writing if needs_clflush is set. */
  static int
-shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
-                 char __user *user_data,
-                 bool page_do_bit17_swizzling,
-                 bool needs_clflush_before,
-                 bool needs_clflush_after)
-{
-       char *vaddr;
-       int ret;
-
-       if (unlikely(page_do_bit17_swizzling))
-               return -EINVAL;
-
-       vaddr = kmap_atomic(page);
-       if (needs_clflush_before)
-               drm_clflush_virt_range(vaddr + shmem_page_offset,
-                                      page_length);
-       ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
-                                       user_data, page_length);
-       if (needs_clflush_after)
-               drm_clflush_virt_range(vaddr + shmem_page_offset,
-                                      page_length);
-       kunmap_atomic(vaddr);
-
-       return ret ? -EFAULT : 0;
-}
-
-/* Only difference to the fast-path function is that this can handle bit17
- * and uses non-atomic copy and kmap functions. */
-static int
-shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
+shmem_pwrite_slow(struct page *page, int offset, int length,
                   char __user *user_data,
                   bool page_do_bit17_swizzling,
                   bool needs_clflush_before,
@@ -1284,124 +1288,114 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
  
         vaddr = kmap(page);
         if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
-               shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
-                                            page_length,
+               shmem_clflush_swizzled_range(vaddr + offset, length,
                                              page_do_bit17_swizzling);
         if (page_do_bit17_swizzling)
-               ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
-                                               user_data,
-                                               page_length);
+               ret = __copy_from_user_swizzled(vaddr, offset, user_data,
+                                               length);
         else
-               ret = __copy_from_user(vaddr + shmem_page_offset,
-                                      user_data,
-                                      page_length);
+               ret = __copy_from_user(vaddr + offset, user_data, length);
         if (needs_clflush_after)
-               shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
-                                            page_length,
+               shmem_clflush_swizzled_range(vaddr + offset, length,
                                              page_do_bit17_swizzling);
         kunmap(page);
  
         return ret ? -EFAULT : 0;
  }
  
+/* Per-page copy function for the shmem pwrite fastpath.
+ * Flushes invalid cachelines before writing to the target if
+ * needs_clflush_before is set and flushes out any written cachelines after
+ * writing if needs_clflush is set.
+ */
  static int
-i915_gem_shmem_pwrite(struct drm_device *dev,
-                     struct drm_i915_gem_object *obj,
-                     struct drm_i915_gem_pwrite *args,
-                     struct drm_file *file)
+shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
+            bool page_do_bit17_swizzling,
+            bool needs_clflush_before,
+            bool needs_clflush_after)
  {
-       ssize_t remain;
-       loff_t offset;
-       char __user *user_data;
-       int shmem_page_offset, page_length, ret = 0;
-       int obj_do_bit17_swizzling, page_do_bit17_swizzling;
-       int hit_slowpath = 0;
-       unsigned int needs_clflush;
-       struct sg_page_iter sg_iter;
+       int ret;
  
-       ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
-       if (ret)
+       ret = -ENODEV;
+       if (!page_do_bit17_swizzling) {
+               char *vaddr = kmap_atomic(page);
+
+               if (needs_clflush_before)
+                       drm_clflush_virt_range(vaddr + offset, len);
+               ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
+               if (needs_clflush_after)
+                       drm_clflush_virt_range(vaddr + offset, len);
+
+               kunmap_atomic(vaddr);
+       }
+       if (ret == 0)
                 return ret;
  
-       obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
-       user_data = u64_to_user_ptr(args->data_ptr);
-       offset = args->offset;
-       remain = args->size;
+       return shmem_pwrite_slow(page, offset, len, user_data,
+                                page_do_bit17_swizzling,
+                                needs_clflush_before,
+                                needs_clflush_after);
+}
  
-       for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
-                        offset >> PAGE_SHIFT) {
-               struct page *page = sg_page_iter_page(&sg_iter);
-               int partial_cacheline_write;
+static int
+i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
+                     const struct drm_i915_gem_pwrite *args)
+{
+       struct drm_i915_private *i915 = to_i915(obj->base.dev);
+       void __user *user_data;
+       u64 remain;
+       unsigned int obj_do_bit17_swizzling;
+       unsigned int partial_cacheline_write;
+       unsigned int needs_clflush;
+       unsigned int offset, idx;
+       int ret;
  
-               if (remain <= 0)
-                       break;
+       ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
+       if (ret)
+               return ret;
  
-               /* Operation in this page
-                *
-                * shmem_page_offset = offset within page in shmem file
-                * page_length = bytes to copy for this page
-                */
-               shmem_page_offset = offset_in_page(offset);
-
-               page_length = remain;
-               if ((shmem_page_offset + page_length) > PAGE_SIZE)
-                       page_length = PAGE_SIZE - shmem_page_offset;
-
-               /* If we don't overwrite a cacheline completely we need to be
-                * careful to have up-to-date data by first clflushing. Don't
-                * overcomplicate things and flush the entire patch. */
-               partial_cacheline_write = needs_clflush & CLFLUSH_BEFORE &&
-                       ((shmem_page_offset | page_length)
-                               & (boot_cpu_data.x86_clflush_size - 1));
-
-               page_do_bit17_swizzling = obj_do_bit17_swizzling &&
-                       (page_to_phys(page) & (1 << 17)) != 0;
-
-               ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
-                                       user_data, page_do_bit17_swizzling,
-                                       partial_cacheline_write,
-                                       needs_clflush & CLFLUSH_AFTER);
-               if (ret == 0)
-                       goto next_page;
-
-               hit_slowpath = 1;
-               mutex_unlock(&dev->struct_mutex);
-               ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
-                                       user_data, page_do_bit17_swizzling,
-                                       partial_cacheline_write,
-                                       needs_clflush & CLFLUSH_AFTER);
+       ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
+       mutex_unlock(&i915->drm.struct_mutex);
+       if (ret)
+               return ret;
+
+       obj_do_bit17_swizzling = 0;
+       if (i915_gem_object_needs_bit17_swizzle(obj))
+               obj_do_bit17_swizzling = BIT(17);
  
-               mutex_lock(&dev->struct_mutex);
+       /* If we don't overwrite a cacheline completely we need to be
+        * careful to have up-to-date data by first clflushing. Don't
+        * overcomplicate things and flush the entire patch.
+        */
+       partial_cacheline_write = 0;
+       if (needs_clflush & CLFLUSH_BEFORE)
+               partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
  
+       user_data = u64_to_user_ptr(args->data_ptr);
+       remain = args->size;
+       offset = offset_in_page(args->offset);
+       for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
+               struct page *page = i915_gem_object_get_page(obj, idx);
+               int length;
+
+               length = remain;
+               if (offset + length > PAGE_SIZE)
+                       length = PAGE_SIZE - offset;
+
+               ret = shmem_pwrite(page, offset, length, user_data,
+                                  page_to_phys(page) & obj_do_bit17_swizzling,
+                                  (offset | length) & partial_cacheline_write,
+                                  needs_clflush & CLFLUSH_AFTER);
                 if (ret)
-                       goto out;
-
-next_page:
-               remain -= page_length;
-               user_data += page_length;
-               offset += page_length;
-       }
-
-out:
-       i915_gem_obj_finish_shmem_access(obj);
+                       break;
  
-       if (hit_slowpath) {
-               /*
-                * Fixup: Flush cpu caches in case we didn't flush the dirty
-                * cachelines in-line while writing and the object moved
-                * out of the cpu write domain while we've dropped the lock.
-                */
-               if (!(needs_clflush & CLFLUSH_AFTER) &&
-                   obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
-                       if (i915_gem_clflush_object(obj, obj->pin_display))
-                               needs_clflush |= CLFLUSH_AFTER;
-               }
+               remain -= length;
+               user_data += length;
+               offset = 0;
         }
  
-       if (needs_clflush & CLFLUSH_AFTER)
-               i915_gem_chipset_flush(to_i915(dev));
-
         intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+       i915_gem_obj_finish_shmem_access(obj);
         return ret;
  }
  
@@ -1417,7 +1411,6 @@ int
  i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
                       struct drm_file *file)
  {
-       struct drm_i915_private *dev_priv = to_i915(dev);
         struct drm_i915_gem_pwrite *args = data;
         struct drm_i915_gem_object *obj;
         int ret;
@@ -1430,13 +1423,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
                        args->size))
                 return -EFAULT;
  
-       if (likely(!i915.prefault_disable)) {
-               ret = fault_in_pages_readable(u64_to_user_ptr(args->data_ptr),
-                                                  args->size);
-               if (ret)
-                       return -EFAULT;
-       }
-
         obj = i915_gem_object_lookup(file, args->handle);
         if (!obj)
                 return -ENOENT;
@@ -1450,15 +1436,17 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
  
         trace_i915_gem_object_pwrite(obj, args->offset, args->size);
  
-       ret = __unsafe_wait_rendering(obj, to_rps_client(file), false);
+       ret = i915_gem_object_wait(obj,
+                                  I915_WAIT_INTERRUPTIBLE |
+                                  I915_WAIT_ALL,
+                                  MAX_SCHEDULE_TIMEOUT,
+                                  to_rps_client(file));
         if (ret)
                 goto err;
  
-       intel_runtime_pm_get(dev_priv);
-
-       ret = i915_mutex_lock_interruptible(dev);
+       ret = i915_gem_object_pin_pages(obj);
         if (ret)
-               goto err_rpm;
+               goto err;
  
         ret = -EFAULT;
         /* We can only do the GTT pwrite on untiled buffers, as otherwise
@@ -1468,30 +1456,23 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
          * perspective, requiring manual detiling by the client.
          */
         if (!i915_gem_object_has_struct_page(obj) ||
-           cpu_write_needs_clflush(obj)) {
-               ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
+           cpu_write_needs_clflush(obj))
                 /* Note that the gtt paths might fail with non-page-backed user
                  * pointers (e.g. gtt mappings when moving data between
-                * textures). Fallback to the shmem path in that case. */
-       }
+                * textures). Fallback to the shmem path in that case.
+                */
+               ret = i915_gem_gtt_pwrite_fast(obj, args);
  
         if (ret == -EFAULT || ret == -ENOSPC) {
                 if (obj->phys_handle)
                         ret = i915_gem_phys_pwrite(obj, args, file);
                 else
-                       ret = i915_gem_shmem_pwrite(dev, obj, args, file);
+                       ret = i915_gem_shmem_pwrite(obj, args);
         }
  
-       i915_gem_object_put(obj);
-       mutex_unlock(&dev->struct_mutex);
-       intel_runtime_pm_put(dev_priv);
-
-       return ret;
-
-err_rpm:
-       intel_runtime_pm_put(dev_priv);
+       i915_gem_object_unpin_pages(obj);
  err:
-       i915_gem_object_put_unlocked(obj);
+       i915_gem_object_put(obj);
         return ret;
  }
  
@@ -1502,6 +1483,30 @@ write_origin(struct drm_i915_gem_object *obj, unsigned domain)
                 obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
  }
  
+static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
+{
+       struct drm_i915_private *i915;
+       struct list_head *list;
+       struct i915_vma *vma;
+
+       list_for_each_entry(vma, &obj->vma_list, obj_link) {
+               if (!i915_vma_is_ggtt(vma))
+                       continue;
+
+               if (i915_vma_is_active(vma))
+                       continue;
+
+               if (!drm_mm_node_allocated(&vma->node))
+                       continue;
+
+               list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+       }
+
+       i915 = to_i915(obj->base.dev);
+       list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
+       list_move_tail(&obj->global_link, list);
+}
+
  /**
   * Called when user space prepares to use an object with the CPU, either
   * through the mmap ioctl's mapping or a GTT mapping.
@@ -1517,7 +1522,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
         struct drm_i915_gem_object *obj;
         uint32_t read_domains = args->read_domains;
         uint32_t write_domain = args->write_domain;
-       int ret;
+       int err;
  
         /* Only handle setting domains to types used by the CPU. */
         if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
@@ -1537,29 +1542,48 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
          * We will repeat the flush holding the lock in the normal manner
          * to catch cases where we are gazumped.
          */
-       ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain);
-       if (ret)
-               goto err;
+       err = i915_gem_object_wait(obj,
+                                  I915_WAIT_INTERRUPTIBLE |
+                                  (write_domain ? I915_WAIT_ALL : 0),
+                                  MAX_SCHEDULE_TIMEOUT,
+                                  to_rps_client(file));
+       if (err)
+               goto out;
  
-       ret = i915_mutex_lock_interruptible(dev);
-       if (ret)
-               goto err;
+       /* Flush and acquire obj->pages so that we are coherent through
+        * direct access in memory with previous cached writes through
+        * shmemfs and that our cache domain tracking remains valid.
+        * For example, if the obj->filp was moved to swap without us
+        * being notified and releasing the pages, we would mistakenly
+        * continue to assume that the obj remained out of the CPU cached
+        * domain.
+        */
+       err = i915_gem_object_pin_pages(obj);
+       if (err)
+               goto out;
+
+       err = i915_mutex_lock_interruptible(dev);
+       if (err)
+               goto out_unpin;
  
         if (read_domains & I915_GEM_DOMAIN_GTT)
-               ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
+               err = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
         else
-               ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
+               err = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
+
+       /* And bump the LRU for this access */
+       i915_gem_object_bump_inactive_ggtt(obj);
+
+       mutex_unlock(&dev->struct_mutex);
  
         if (write_domain != 0)
                 intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
  
+out_unpin:
+       i915_gem_object_unpin_pages(obj);
+out:
         i915_gem_object_put(obj);
-       mutex_unlock(&dev->struct_mutex);
-       return ret;
-
-err:
-       i915_gem_object_put_unlocked(obj);
-       return ret;
+       return err;
  }
  
  /**
@@ -1589,7 +1613,7 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
                 }
         }
  
-       i915_gem_object_put_unlocked(obj);
+       i915_gem_object_put(obj);
         return err;
  }
  
@@ -1635,7 +1659,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
          * pages from.
          */
         if (!obj->base.filp) {
-               i915_gem_object_put_unlocked(obj);
+               i915_gem_object_put(obj);
                 return -EINVAL;
         }
  
@@ -1647,7 +1671,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
                 struct vm_area_struct *vma;
  
                 if (down_write_killable(&mm->mmap_sem)) {
-                       i915_gem_object_put_unlocked(obj);
+                       i915_gem_object_put(obj);
                         return -EINTR;
                 }
                 vma = find_vma(mm, addr);
@@ -1661,7 +1685,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
                 /* This may race, but that's ok, it only gets set */
                 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
         }
-       i915_gem_object_put_unlocked(obj);
+       i915_gem_object_put(obj);
         if (IS_ERR((void *)addr))
                 return addr;
  
@@ -1763,8 +1787,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
         int ret;
  
         /* We don't use vmf->pgoff since that has the fake offset */
-       page_offset = ((unsigned long)vmf->virtual_address - area->vm_start) >>
-               PAGE_SHIFT;
+       page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
  
         trace_i915_gem_object_fault(obj, page_offset, true, write);
  
@@ -1773,7 +1796,14 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
          * repeat the flush holding the lock in the normal manner to catch cases
          * where we are gazumped.
          */
-       ret = __unsafe_wait_rendering(obj, NULL, !write);
+       ret = i915_gem_object_wait(obj,
+                                  I915_WAIT_INTERRUPTIBLE,
+                                  MAX_SCHEDULE_TIMEOUT,
+                                  NULL);
+       if (ret)
+               goto err;
+
+       ret = i915_gem_object_pin_pages(obj);
         if (ret)
                 goto err;
  
@@ -1784,7 +1814,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
                 goto err_rpm;
  
         /* Access to snoopable pages through the GTT is incoherent. */
-       if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
+       if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
                 ret = -EFAULT;
                 goto err_unlock;
         }
@@ -1813,8 +1843,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
                 view.params.partial.offset = rounddown(page_offset, chunk_size);
                 view.params.partial.size =
                         min_t(unsigned int, chunk_size,
-                             (area->vm_end - area->vm_start) / PAGE_SIZE -
-                             view.params.partial.offset);
+                             vma_pages(area) - view.params.partial.offset);
  
                 /* If the partial covers the entire object, just create a
                  * normal VMA.
@@ -1842,22 +1871,25 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
         if (ret)
                 goto err_unpin;
  
+       /* Mark as being mmapped into userspace for later revocation */
+       assert_rpm_wakelock_held(dev_priv);
+       if (list_empty(&obj->userfault_link))
+               list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
+
         /* Finally, remap it using the new GTT offset */
         ret = remap_io_mapping(area,
                                area->vm_start + (vma->ggtt_view.params.partial.offset << PAGE_SHIFT),
                                (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT,
                                min_t(u64, vma->size, area->vm_end - area->vm_start),
                                &ggtt->mappable);
-       if (ret)
-               goto err_unpin;
  
-       obj->fault_mappable = true;
  err_unpin:
         __i915_vma_unpin(vma);
  err_unlock:
         mutex_unlock(&dev->struct_mutex);
  err_rpm:
         intel_runtime_pm_put(dev_priv);
+       i915_gem_object_unpin_pages(obj);
  err:
         switch (ret) {
         case -EIO:
@@ -1919,15 +1951,23 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
  void
  i915_gem_release_mmap(struct drm_i915_gem_object *obj)
  {
+       struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
         /* Serialisation between user GTT access and our code depends upon
          * revoking the CPU's PTE whilst the mutex is held. The next user
          * pagefault then has to wait until we release the mutex.
+        *
+        * Note that RPM complicates somewhat by adding an additional
+        * requirement that operations to the GGTT be made holding the RPM
+        * wakeref.
          */
-       lockdep_assert_held(&obj->base.dev->struct_mutex);
+       lockdep_assert_held(&i915->drm.struct_mutex);
+       intel_runtime_pm_get(i915);
  
-       if (!obj->fault_mappable)
-               return;
+       if (list_empty(&obj->userfault_link))
+               goto out;
  
+       list_del_init(&obj->userfault_link);
         drm_vma_node_unmap(&obj->base.vma_node,
                            obj->base.dev->anon_inode->i_mapping);
  
@@ -1940,16 +1980,53 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
          */
         wmb();
  
-       obj->fault_mappable = false;
+out:
+       intel_runtime_pm_put(i915);
  }
  
-void
-i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
+void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
  {
-       struct drm_i915_gem_object *obj;
+       struct drm_i915_gem_object *obj, *on;
+       int i;
  
-       list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
-               i915_gem_release_mmap(obj);
+       /*
+        * Only called during RPM suspend. All users of the userfault_list
+        * must be holding an RPM wakeref to ensure that this can not
+        * run concurrently with themselves (and use the struct_mutex for
+        * protection between themselves).
+        */
+
+       list_for_each_entry_safe(obj, on,
+                                &dev_priv->mm.userfault_list, userfault_link) {
+               list_del_init(&obj->userfault_link);
+               drm_vma_node_unmap(&obj->base.vma_node,
+                                  obj->base.dev->anon_inode->i_mapping);
+       }
+
+       /* The fence will be lost when the device powers down. If any were
+        * in use by hardware (i.e. they are pinned), we should not be powering
+        * down! All other fences will be reacquired by the user upon waking.
+        */
+       for (i = 0; i < dev_priv->num_fence_regs; i++) {
+               struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
+
+               /* Ideally we want to assert that the fence register is not
+                * live at this point (i.e. that no piece of code will be
+                * trying to write through fence + GTT, as that both violates
+                * our tracking of activity and associated locking/barriers,
+                * but also is illegal given that the hw is powered down).
+                *
+                * Previously we used reg->pin_count as a "liveness" indicator.
+                * That is not sufficient, and we need a more fine-grained
+                * tool if we want to have a sanity check here.
+                */
+
+               if (!reg->vma)
+                       continue;
+
+               GEM_BUG_ON(!list_empty(&reg->vma->obj->userfault_link));
+               reg->dirty = true;
+       }
  }
  
  /**
@@ -2063,7 +2140,7 @@ i915_gem_mmap_gtt(struct drm_file *file,
         if (ret == 0)
                 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
  
-       i915_gem_object_put_unlocked(obj);
+       i915_gem_object_put(obj);
         return ret;
  }
  
@@ -2106,16 +2183,18 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj)
          * backing pages, *now*.
          */
         shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
-       obj->madv = __I915_MADV_PURGED;
+       obj->mm.madv = __I915_MADV_PURGED;
  }
  
  /* Try to discard unwanted pages */
-static void
-i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
+void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
  {
         struct address_space *mapping;
  
-       switch (obj->madv) {
+       lockdep_assert_held(&obj->mm.lock);
+       GEM_BUG_ON(obj->mm.pages);
+
+       switch (obj->mm.madv) {
         case I915_MADV_DONTNEED:
                 i915_gem_object_truncate(obj);
         case __I915_MADV_PURGED:
@@ -2130,95 +2209,122 @@ i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
  }
  
  static void
-i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
+i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
+                             struct sg_table *pages)
  {
         struct sgt_iter sgt_iter;
         struct page *page;
-       int ret;
  
-       BUG_ON(obj->madv == __I915_MADV_PURGED);
-
-       ret = i915_gem_object_set_to_cpu_domain(obj, true);
-       if (WARN_ON(ret)) {
-               /* In the event of a disaster, abandon all caches and
-                * hope for the best.
-                */
-               i915_gem_clflush_object(obj, true);
-               obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
-       }
+       __i915_gem_object_release_shmem(obj, pages, true);
  
-       i915_gem_gtt_finish_object(obj);
+       i915_gem_gtt_finish_pages(obj, pages);
  
         if (i915_gem_object_needs_bit17_swizzle(obj))
-               i915_gem_object_save_bit_17_swizzle(obj);
-
-       if (obj->madv == I915_MADV_DONTNEED)
-               obj->dirty = 0;
+               i915_gem_object_save_bit_17_swizzle(obj, pages);
  
-       for_each_sgt_page(page, sgt_iter, obj->pages) {
-               if (obj->dirty)
+       for_each_sgt_page(page, sgt_iter, pages) {
+               if (obj->mm.dirty)
                         set_page_dirty(page);
  
-               if (obj->madv == I915_MADV_WILLNEED)
+               if (obj->mm.madv == I915_MADV_WILLNEED)
                         mark_page_accessed(page);
  
                 put_page(page);
         }
-       obj->dirty = 0;
+       obj->mm.dirty = false;
  
-       sg_free_table(obj->pages);
-       kfree(obj->pages);
+       sg_free_table(pages);
+       kfree(pages);
  }
  
-int
-i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
+static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
  {
-       const struct drm_i915_gem_object_ops *ops = obj->ops;
+       struct radix_tree_iter iter;
+       void **slot;
  
-       if (obj->pages == NULL)
-               return 0;
+       radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
+               radix_tree_delete(&obj->mm.get_page.radix, iter.index);
+}
  
-       if (obj->pages_pin_count)
-               return -EBUSY;
+void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+                                enum i915_mm_subclass subclass)
+{
+       struct sg_table *pages;
+
+       if (i915_gem_object_has_pinned_pages(obj))
+               return;
  
         GEM_BUG_ON(obj->bind_count);
+       if (!READ_ONCE(obj->mm.pages))
+               return;
+
+       /* May be called by shrinker from within get_pages() (on another bo) */
+       mutex_lock_nested(&obj->mm.lock, subclass);
+       if (unlikely(atomic_read(&obj->mm.pages_pin_count)))
+               goto unlock;
  
         /* ->put_pages might need to allocate memory for the bit17 swizzle
          * array, hence protect them from being reaped by removing them from gtt
          * lists early. */
-       list_del(&obj->global_list);
+       pages = fetch_and_zero(&obj->mm.pages);
+       GEM_BUG_ON(!pages);
  
-       if (obj->mapping) {
+       if (obj->mm.mapping) {
                 void *ptr;
  
-               ptr = ptr_mask_bits(obj->mapping);
+               ptr = ptr_mask_bits(obj->mm.mapping);
                 if (is_vmalloc_addr(ptr))
                         vunmap(ptr);
                 else
                         kunmap(kmap_to_page(ptr));
  
-               obj->mapping = NULL;
+               obj->mm.mapping = NULL;
         }
  
-       ops->put_pages(obj);
-       obj->pages = NULL;
+       __i915_gem_object_reset_page_iter(obj);
+
+       obj->ops->put_pages(obj, pages);
+unlock:
+       mutex_unlock(&obj->mm.lock);
+}
  
-       i915_gem_object_invalidate(obj);
+static void i915_sg_trim(struct sg_table *orig_st)
+{
+       struct sg_table new_st;
+       struct scatterlist *sg, *new_sg;
+       unsigned int i;
  
-       return 0;
+       if (orig_st->nents == orig_st->orig_nents)
+               return;
+
+       if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
+               return;
+
+       new_sg = new_st.sgl;
+       for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
+               sg_set_page(new_sg, sg_page(sg), sg->length, 0);
+               /* called before being DMA mapped, no need to copy sg->dma_* */
+               new_sg = sg_next(new_sg);
+       }
+
+       sg_free_table(orig_st);
+
+       *orig_st = new_st;
  }
  
-static int
+static struct sg_table *
  i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
  {
         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-       int page_count, i;
+       const unsigned long page_count = obj->base.size / PAGE_SIZE;
+       unsigned long i;
         struct address_space *mapping;
         struct sg_table *st;
         struct scatterlist *sg;
         struct sgt_iter sgt_iter;
         struct page *page;
         unsigned long last_pfn = 0;     /* suppress gcc warning */
+       unsigned int max_segment;
         int ret;
         gfp_t gfp;
  
@@ -2226,17 +2332,21 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
          * wasn't in the GTT, there shouldn't be any way it could have been in
          * a GPU cache
          */
-       BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
-       BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
+       GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
+       GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
+
+       max_segment = swiotlb_max_segment();
+       if (!max_segment)
+               max_segment = rounddown(UINT_MAX, PAGE_SIZE);
  
         st = kmalloc(sizeof(*st), GFP_KERNEL);
         if (st == NULL)
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
  
-       page_count = obj->base.size / PAGE_SIZE;
+rebuild_st:
         if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
                 kfree(st);
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
         }
  
         /* Get the list of pages out of our struct file.  They'll be pinned
@@ -2264,22 +2374,15 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
                          * our own buffer, now let the real VM do its job and
                          * go down in flames if truly OOM.
                          */
-                       i915_gem_shrink_all(dev_priv);
                         page = shmem_read_mapping_page(mapping, i);
                         if (IS_ERR(page)) {
                                 ret = PTR_ERR(page);
                                 goto err_sg;
                         }
                 }
-#ifdef CONFIG_SWIOTLB
-               if (swiotlb_nr_tbl()) {
-                       st->nents++;
-                       sg_set_page(sg, page, PAGE_SIZE, 0);
-                       sg = sg_next(sg);
-                       continue;
-               }
-#endif
-               if (!i || page_to_pfn(page) != last_pfn + 1) {
+               if (!i ||
+                   sg->length >= max_segment ||
+                   page_to_pfn(page) != last_pfn + 1) {
                         if (i)
                                 sg = sg_next(sg);
                         st->nents++;
@@ -2292,24 +2395,37 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
                 /* Check that the i965g/gm workaround works. */
                 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
         }
-#ifdef CONFIG_SWIOTLB
-       if (!swiotlb_nr_tbl())
-#endif
+       if (sg) /* loop terminated early; short sg table */
                 sg_mark_end(sg);
-       obj->pages = st;
  
-       ret = i915_gem_gtt_prepare_object(obj);
-       if (ret)
-               goto err_pages;
+       /* Trim unused sg entries to avoid wasting memory. */
+       i915_sg_trim(st);
  
-       if (i915_gem_object_needs_bit17_swizzle(obj))
-               i915_gem_object_do_bit_17_swizzle(obj);
+       ret = i915_gem_gtt_prepare_pages(obj, st);
+       if (ret) {
+               /* DMA remapping failed? One possible cause is that
+                * it could not reserve enough large entries, asking
+                * for PAGE_SIZE chunks instead may be helpful.
+                */
+               if (max_segment > PAGE_SIZE) {
+                       for_each_sgt_page(page, sgt_iter, st)
+                               put_page(page);
+                       sg_free_table(st);
  
-       if (i915_gem_object_is_tiled(obj) &&
-           dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
-               i915_gem_object_pin_pages(obj);
+                       max_segment = PAGE_SIZE;
+                       goto rebuild_st;
+               } else {
+                       dev_warn(&dev_priv->drm.pdev->dev,
+                                "Failed to DMA remap %lu pages\n",
+                                page_count);
+                       goto err_pages;
+               }
+       }
  
-       return 0;
+       if (i915_gem_object_needs_bit17_swizzle(obj))
+               i915_gem_object_do_bit_17_swizzle(obj, st);
+
+       return st;
  
  err_sg:
         sg_mark_end(sg);
@@ -2330,43 +2446,73 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
         if (ret == -ENOSPC)
                 ret = -ENOMEM;
  
-       return ret;
+       return ERR_PTR(ret);
  }
  
-/* Ensure that the associated pages are gathered from the backing storage
- * and pinned into our object. i915_gem_object_get_pages() may be called
- * multiple times before they are released by a single call to
- * i915_gem_object_put_pages() - once the pages are no longer referenced
- * either as a result of memory pressure (reaping pages under the shrinker)
- * or as the object is itself released.
- */
-int
-i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
+void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
+                                struct sg_table *pages)
  {
-       struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-       const struct drm_i915_gem_object_ops *ops = obj->ops;
-       int ret;
+       lockdep_assert_held(&obj->mm.lock);
  
-       if (obj->pages)
-               return 0;
+       obj->mm.get_page.sg_pos = pages->sgl;
+       obj->mm.get_page.sg_idx = 0;
  
-       if (obj->madv != I915_MADV_WILLNEED) {
+       obj->mm.pages = pages;
+
+       if (i915_gem_object_is_tiled(obj) &&
+           to_i915(obj->base.dev)->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
+               GEM_BUG_ON(obj->mm.quirked);
+               __i915_gem_object_pin_pages(obj);
+               obj->mm.quirked = true;
+       }
+}
+
+static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
+{
+       struct sg_table *pages;
+
+       GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
+
+       if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
                 DRM_DEBUG("Attempting to obtain a purgeable object\n");
                 return -EFAULT;
         }
  
-       BUG_ON(obj->pages_pin_count);
+       pages = obj->ops->get_pages(obj);
+       if (unlikely(IS_ERR(pages)))
+               return PTR_ERR(pages);
  
-       ret = ops->get_pages(obj);
-       if (ret)
-               return ret;
+       __i915_gem_object_set_pages(obj, pages);
+       return 0;
+}
+
+/* Ensure that the associated pages are gathered from the backing storage
+ * and pinned into our object. i915_gem_object_pin_pages() may be called
+ * multiple times before they are released by a single call to
+ * i915_gem_object_unpin_pages() - once the pages are no longer referenced
+ * either as a result of memory pressure (reaping pages under the shrinker)
+ * or as the object is itself released.
+ */
+int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
+{
+       int err;
  
-       list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
+       err = mutex_lock_interruptible(&obj->mm.lock);
+       if (err)
+               return err;
  
-       obj->get_page.sg = obj->pages->sgl;
-       obj->get_page.last = 0;
+       if (unlikely(!obj->mm.pages)) {
+               err = ____i915_gem_object_get_pages(obj);
+               if (err)
+                       goto unlock;
  
-       return 0;
+               smp_mb__before_atomic();
+       }
+       atomic_inc(&obj->mm.pages_pin_count);
+
+unlock:
+       mutex_unlock(&obj->mm.lock);
+       return err;
  }
  
  /* The 'mapping' part of i915_gem_object_pin_map() below */
@@ -2374,7 +2520,7 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
                                  enum i915_map_type type)
  {
         unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
-       struct sg_table *sgt = obj->pages;
+       struct sg_table *sgt = obj->mm.pages;
         struct sgt_iter sgt_iter;
         struct page *page;
         struct page *stack_pages[32];
@@ -2425,21 +2571,31 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
         void *ptr;
         int ret;
  
-       lockdep_assert_held(&obj->base.dev->struct_mutex);
         GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
  
-       ret = i915_gem_object_get_pages(obj);
+       ret = mutex_lock_interruptible(&obj->mm.lock);
         if (ret)
                 return ERR_PTR(ret);
  
-       i915_gem_object_pin_pages(obj);
-       pinned = obj->pages_pin_count > 1;
+       pinned = true;
+       if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
+               if (unlikely(!obj->mm.pages)) {
+                       ret = ____i915_gem_object_get_pages(obj);
+                       if (ret)
+                               goto err_unlock;
+
+                       smp_mb__before_atomic();
+               }
+               atomic_inc(&obj->mm.pages_pin_count);
+               pinned = false;
+       }
+       GEM_BUG_ON(!obj->mm.pages);
  
-       ptr = ptr_unpack_bits(obj->mapping, has_type);
+       ptr = ptr_unpack_bits(obj->mm.mapping, has_type);
         if (ptr && has_type != type) {
                 if (pinned) {
                         ret = -EBUSY;
-                       goto err;
+                       goto err_unpin;
                 }
  
                 if (is_vmalloc_addr(ptr))
@@ -2447,59 +2603,28 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
                 else
                         kunmap(kmap_to_page(ptr));
  
-               ptr = obj->mapping = NULL;
+               ptr = obj->mm.mapping = NULL;
         }
  
         if (!ptr) {
                 ptr = i915_gem_object_map(obj, type);
                 if (!ptr) {
                         ret = -ENOMEM;
-                       goto err;
+                       goto err_unpin;
                 }
  
-               obj->mapping = ptr_pack_bits(ptr, type);
+               obj->mm.mapping = ptr_pack_bits(ptr, type);
         }
  
+out_unlock:
+       mutex_unlock(&obj->mm.lock);
         return ptr;
  
-err:
-       i915_gem_object_unpin_pages(obj);
-       return ERR_PTR(ret);
-}
-
-static void
-i915_gem_object_retire__write(struct i915_gem_active *active,
-                             struct drm_i915_gem_request *request)
-{
-       struct drm_i915_gem_object *obj =
-               container_of(active, struct drm_i915_gem_object, last_write);
-
-       intel_fb_obj_flush(obj, true, ORIGIN_CS);
-}
-
-static void
-i915_gem_object_retire__read(struct i915_gem_active *active,
-                            struct drm_i915_gem_request *request)
-{
-       int idx = request->engine->id;
-       struct drm_i915_gem_object *obj =
-               container_of(active, struct drm_i915_gem_object, last_read[idx]);
-
-       GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, idx));
-
-       i915_gem_object_clear_active(obj, idx);
-       if (i915_gem_object_is_active(obj))
-               return;
-
-       /* Bump our place on the bound list to keep it roughly in LRU order
-        * so that we don't steal from recently used but inactive objects
-        * (unless we are forced to ofc!)
-        */
-       if (obj->bind_count)
-               list_move_tail(&obj->global_list,
-                              &request->i915->mm.bound_list);
-
-       i915_gem_object_put(obj);
+err_unpin:
+       atomic_dec(&obj->mm.pages_pin_count);
+err_unlock:
+       ptr = ERR_PTR(ret);
+       goto out_unlock;
  }
  
  static bool i915_context_is_banned(const struct i915_gem_context *ctx)
@@ -2546,13 +2671,10 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
          * extra delay for a recent interrupt is pointless. Hence, we do
          * not need an engine->irq_seqno_barrier() before the seqno reads.
          */
-       list_for_each_entry(request, &engine->request_list, link) {
-               if (i915_gem_request_completed(request))
+       list_for_each_entry(request, &engine->timeline->requests, link) {
+               if (__i915_gem_request_completed(request))
                         continue;
  
-               if (!i915_sw_fence_done(&request->submit))
-                       break;
-
                 return request;
         }
  
@@ -2580,10 +2702,10 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
  {
         struct drm_i915_gem_request *request;
         struct i915_gem_context *incomplete_ctx;
+       struct intel_timeline *timeline;
+       unsigned long flags;
         bool ring_hung;
  
-       /* Ensure irq handler finishes, and not run again. */
-       tasklet_kill(&engine->irq_tasklet);
         if (engine->irq_seqno_barrier)
                 engine->irq_seqno_barrier(engine);
  
@@ -2592,12 +2714,15 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
                 return;
  
         ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
+       if (engine->hangcheck.seqno != intel_engine_get_seqno(engine))
+               ring_hung = false;
+
         i915_set_reset_status(request->ctx, ring_hung);
         if (!ring_hung)
                 return;
  
         DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
-                        engine->name, request->fence.seqno);
+                        engine->name, request->global_seqno);
  
         /* Setup the CS to resume from the breadcrumb of the hung request */
         engine->reset_hw(engine, request);
@@ -2614,21 +2739,35 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
         if (i915_gem_context_is_default(incomplete_ctx))
                 return;
  
-       list_for_each_entry_continue(request, &engine->request_list, link)
+       timeline = i915_gem_context_lookup_timeline(incomplete_ctx, engine);
+
+       spin_lock_irqsave(&engine->timeline->lock, flags);
+       spin_lock(&timeline->lock);
+
+       list_for_each_entry_continue(request, &engine->timeline->requests, link)
                 if (request->ctx == incomplete_ctx)
                         reset_request(request);
+
+       list_for_each_entry(request, &timeline->requests, link)
+               reset_request(request);
+
+       spin_unlock(&timeline->lock);
+       spin_unlock_irqrestore(&engine->timeline->lock, flags);
  }
  
  void i915_gem_reset(struct drm_i915_private *dev_priv)
  {
         struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+
+       lockdep_assert_held(&dev_priv->drm.struct_mutex);
  
         i915_gem_retire_requests(dev_priv);
  
-       for_each_engine(engine, dev_priv)
+       for_each_engine(engine, dev_priv, id)
                 i915_gem_reset_engine(engine);
  
-       i915_gem_restore_fences(&dev_priv->drm);
+       i915_gem_restore_fences(dev_priv);
  
         if (dev_priv->gt.awake) {
                 intel_sanitize_gt_powersave(dev_priv);
@@ -2640,6 +2779,8 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
  
  static void nop_submit_request(struct drm_i915_gem_request *request)
  {
+       i915_gem_request_submit(request);
+       intel_engine_init_global_seqno(request->engine, request->global_seqno);
  }
  
  static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
@@ -2650,7 +2791,8 @@ static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
          * (lockless) lookup doesn't try and wait upon the request as we
          * reset it.
          */
-       intel_engine_init_seqno(engine, engine->last_submitted_seqno);
+       intel_engine_init_global_seqno(engine,
+                                      intel_engine_last_submit(engine));
  
         /*
          * Clear the execlists queue up before freeing the requests, as those
@@ -2659,26 +2801,30 @@ static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
          */
  
         if (i915.enable_execlists) {
-               spin_lock(&engine->execlist_lock);
-               INIT_LIST_HEAD(&engine->execlist_queue);
+               unsigned long flags;
+
+               spin_lock_irqsave(&engine->timeline->lock, flags);
+
                 i915_gem_request_put(engine->execlist_port[0].request);
                 i915_gem_request_put(engine->execlist_port[1].request);
                 memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
-               spin_unlock(&engine->execlist_lock);
-       }
+               engine->execlist_queue = RB_ROOT;
+               engine->execlist_first = NULL;
  
-       engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
+               spin_unlock_irqrestore(&engine->timeline->lock, flags);
+       }
  }
  
  void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
  {
         struct intel_engine_cs *engine;
+       enum intel_engine_id id;
  
         lockdep_assert_held(&dev_priv->drm.struct_mutex);
         set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
  
         i915_gem_context_lost(dev_priv);
-       for_each_engine(engine, dev_priv)
+       for_each_engine(engine, dev_priv, id)
                 i915_gem_cleanup_engine(engine);
         mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
  
@@ -2717,12 +2863,20 @@ i915_gem_idle_work_handler(struct work_struct *work)
                 container_of(work, typeof(*dev_priv), gt.idle_work.work);
         struct drm_device *dev = &dev_priv->drm;
         struct intel_engine_cs *engine;
+       enum intel_engine_id id;
         bool rearm_hangcheck;
  
         if (!READ_ONCE(dev_priv->gt.awake))
                 return;
  
-       if (READ_ONCE(dev_priv->gt.active_engines))
+       /*
+        * Wait for last execlists context complete, but bail out in case a
+        * new request is submitted.
+        */
+       wait_for(READ_ONCE(dev_priv->gt.active_requests) ||
+                intel_execlists_idle(dev_priv), 10);
+
+       if (READ_ONCE(dev_priv->gt.active_requests))
                 return;
  
         rearm_hangcheck =
@@ -2736,10 +2890,20 @@ i915_gem_idle_work_handler(struct work_struct *work)
                 goto out_rearm;
         }
  
-       if (dev_priv->gt.active_engines)
+       /*
+        * New request retired after this work handler started, extend active
+        * period until next instance of the work.
+        */
+       if (work_pending(work))
+               goto out_unlock;
+
+       if (dev_priv->gt.active_requests)
                 goto out_unlock;
  
-       for_each_engine(engine, dev_priv)
+       if (wait_for(intel_execlists_idle(dev_priv), 10))
+               DRM_ERROR("Timeout waiting for engines to idle\n");
+
+       for_each_engine(engine, dev_priv, id)
                 i915_gem_batch_pool_fini(&engine->batch_pool);
  
         GEM_BUG_ON(!dev_priv->gt.awake);
@@ -2769,9 +2933,26 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
         list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
                 if (vma->vm->file == fpriv)
                         i915_vma_close(vma);
+
+       if (i915_gem_object_is_active(obj) &&
+           !i915_gem_object_has_active_reference(obj)) {
+               i915_gem_object_set_active_reference(obj);
+               i915_gem_object_get(obj);
+       }
         mutex_unlock(&obj->base.dev->struct_mutex);
  }
  
+static unsigned long to_wait_timeout(s64 timeout_ns)
+{
+       if (timeout_ns < 0)
+               return MAX_SCHEDULE_TIMEOUT;
+
+       if (timeout_ns == 0)
+               return 0;
+
+       return nsecs_to_jiffies_timeout(timeout_ns);
+}
+
  /**
   * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
   * @dev: drm device pointer
@@ -2800,10 +2981,9 @@ int
  i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
  {
         struct drm_i915_gem_wait *args = data;
-       struct intel_rps_client *rps = to_rps_client(file);
         struct drm_i915_gem_object *obj;
-       unsigned long active;
-       int idx, ret = 0;
+       ktime_t start;
+       long ret;
  
         if (args->flags != 0)
                 return -EINVAL;
@@ -2812,133 +2992,29 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
         if (!obj)
                 return -ENOENT;
  
-       active = __I915_BO_ACTIVE(obj);
-       for_each_active(active, idx) {
-               s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL;
-               ret = i915_gem_active_wait_unlocked(&obj->last_read[idx],
-                                                   I915_WAIT_INTERRUPTIBLE,
-                                                   timeout, rps);
-               if (ret)
-                       break;
-       }
-
-       i915_gem_object_put_unlocked(obj);
-       return ret;
-}
-
-static void __i915_vma_iounmap(struct i915_vma *vma)
-{
-       GEM_BUG_ON(i915_vma_is_pinned(vma));
-
-       if (vma->iomap == NULL)
-               return;
-
-       io_mapping_unmap(vma->iomap);
-       vma->iomap = NULL;
-}
-
-int i915_vma_unbind(struct i915_vma *vma)
-{
-       struct drm_i915_gem_object *obj = vma->obj;
-       unsigned long active;
-       int ret;
-
-       /* First wait upon any activity as retiring the request may
-        * have side-effects such as unpinning or even unbinding this vma.
-        */
-       active = i915_vma_get_active(vma);
-       if (active) {
-               int idx;
-
-               /* When a closed VMA is retired, it is unbound - eek.
-                * In order to prevent it from being recursively closed,
-                * take a pin on the vma so that the second unbind is
-                * aborted.
-                */
-               __i915_vma_pin(vma);
-
-               for_each_active(active, idx) {
-                       ret = i915_gem_active_retire(&vma->last_read[idx],
-                                                  &vma->vm->dev->struct_mutex);
-                       if (ret)
-                               break;
-               }
-
-               __i915_vma_unpin(vma);
-               if (ret)
-                       return ret;
-
-               GEM_BUG_ON(i915_vma_is_active(vma));
-       }
-
-       if (i915_vma_is_pinned(vma))
-               return -EBUSY;
-
-       if (!drm_mm_node_allocated(&vma->node))
-               goto destroy;
-
-       GEM_BUG_ON(obj->bind_count == 0);
-       GEM_BUG_ON(!obj->pages);
-
-       if (i915_vma_is_map_and_fenceable(vma)) {
-               /* release the fence reg _after_ flushing */
-               ret = i915_vma_put_fence(vma);
-               if (ret)
-                       return ret;
-
-               /* Force a pagefault for domain tracking on next user access */
-               i915_gem_release_mmap(obj);
-
-               __i915_vma_iounmap(vma);
-               vma->flags &= ~I915_VMA_CAN_FENCE;
-       }
-
-       if (likely(!vma->vm->closed)) {
-               trace_i915_vma_unbind(vma);
-               vma->vm->unbind_vma(vma);
-       }
-       vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
+       start = ktime_get();
  
-       drm_mm_remove_node(&vma->node);
-       list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
+       ret = i915_gem_object_wait(obj,
+                                  I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL,
+                                  to_wait_timeout(args->timeout_ns),
+                                  to_rps_client(file));
  
-       if (vma->pages != obj->pages) {
-               GEM_BUG_ON(!vma->pages);
-               sg_free_table(vma->pages);
-               kfree(vma->pages);
+       if (args->timeout_ns > 0) {
+               args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
+               if (args->timeout_ns < 0)
+                       args->timeout_ns = 0;
         }
-       vma->pages = NULL;
-
-       /* Since the unbound list is global, only move to that list if
-        * no more VMAs exist. */
-       if (--obj->bind_count == 0)
-               list_move_tail(&obj->global_list,
-                              &to_i915(obj->base.dev)->mm.unbound_list);
-
-       /* And finally now the object is completely decoupled from this vma,
-        * we can drop its hold on the backing storage and allow it to be
-        * reaped by the shrinker.
-        */
-       i915_gem_object_unpin_pages(obj);
-
-destroy:
-       if (unlikely(i915_vma_is_closed(vma)))
-               i915_vma_destroy(vma);
  
-       return 0;
+       i915_gem_object_put(obj);
+       return ret;
  }
  
-int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
-                          unsigned int flags)
+static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags)
  {
-       struct intel_engine_cs *engine;
-       int ret;
-
-       for_each_engine(engine, dev_priv) {
-               if (engine->last_context == NULL)
-                       continue;
+       int ret, i;
  
-               ret = intel_engine_idle(engine, flags);
+       for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
+               ret = i915_gem_active_wait(&tl->engine[i].last_request, flags);
                 if (ret)
                         return ret;
         }
@@ -2946,187 +3022,45 @@ int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
         return 0;
  }
  
-static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
-                                    unsigned long cache_level)
-{
-       struct drm_mm_node *gtt_space = &vma->node;
-       struct drm_mm_node *other;
-
-       /*
-        * On some machines we have to be careful when putting differing types
-        * of snoopable memory together to avoid the prefetcher crossing memory
-        * domains and dying. During vm initialisation, we decide whether or not
-        * these constraints apply and set the drm_mm.color_adjust
-        * appropriately.
-        */
-       if (vma->vm->mm.color_adjust == NULL)
-               return true;
-
-       if (!drm_mm_node_allocated(gtt_space))
-               return true;
-
-       if (list_empty(&gtt_space->node_list))
-               return true;
-
-       other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
-       if (other->allocated && !other->hole_follows && other->color != cache_level)
-               return false;
-
-       other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
-       if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
-               return false;
-
-       return true;
-}
-
-/**
- * i915_vma_insert - finds a slot for the vma in its address space
- * @vma: the vma
- * @size: requested size in bytes (can be larger than the VMA)
- * @alignment: required alignment
- * @flags: mask of PIN_* flags to use
- *
- * First we try to allocate some free space that meets the requirements for
- * the VMA. Failiing that, if the flags permit, it will evict an old VMA,
- * preferrably the oldest idle entry to make room for the new VMA.
- *
- * Returns:
- * 0 on success, negative error code otherwise.
- */
-static int
-i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
+int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
  {
-       struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
-       struct drm_i915_gem_object *obj = vma->obj;
-       u64 start, end;
         int ret;
  
-       GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
-       GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
-
-       size = max(size, vma->size);
-       if (flags & PIN_MAPPABLE)
-               size = i915_gem_get_ggtt_size(dev_priv, size,
-                                             i915_gem_object_get_tiling(obj));
-
-       alignment = max(max(alignment, vma->display_alignment),
-                       i915_gem_get_ggtt_alignment(dev_priv, size,
-                                                   i915_gem_object_get_tiling(obj),
-                                                   flags & PIN_MAPPABLE));
-
-       start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
-
-       end = vma->vm->total;
-       if (flags & PIN_MAPPABLE)
-               end = min_t(u64, end, dev_priv->ggtt.mappable_end);
-       if (flags & PIN_ZONE_4G)
-               end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
-
-       /* If binding the object/GGTT view requires more space than the entire
-        * aperture has, reject it early before evicting everything in a vain
-        * attempt to find space.
-        */
-       if (size > end) {
-               DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
-                         size, obj->base.size,
-                         flags & PIN_MAPPABLE ? "mappable" : "total",
-                         end);
-               return -E2BIG;
-       }
-
-       ret = i915_gem_object_get_pages(obj);
-       if (ret)
-               return ret;
-
-       i915_gem_object_pin_pages(obj);
+       if (flags & I915_WAIT_LOCKED) {
+               struct i915_gem_timeline *tl;
  
-       if (flags & PIN_OFFSET_FIXED) {
-               u64 offset = flags & PIN_OFFSET_MASK;
-               if (offset & (alignment - 1) || offset > end - size) {
-                       ret = -EINVAL;
-                       goto err_unpin;
-               }
+               lockdep_assert_held(&i915->drm.struct_mutex);
  
-               vma->node.start = offset;
-               vma->node.size = size;
-               vma->node.color = obj->cache_level;
-               ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
-               if (ret) {
-                       ret = i915_gem_evict_for_vma(vma);
-                       if (ret == 0)
-                               ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
+               list_for_each_entry(tl, &i915->gt.timelines, link) {
+                       ret = wait_for_timeline(tl, flags);
                         if (ret)
-                               goto err_unpin;
+                               return ret;
                 }
         } else {
-               u32 search_flag, alloc_flag;
-
-               if (flags & PIN_HIGH) {
-                       search_flag = DRM_MM_SEARCH_BELOW;
-                       alloc_flag = DRM_MM_CREATE_TOP;
-               } else {
-                       search_flag = DRM_MM_SEARCH_DEFAULT;
-                       alloc_flag = DRM_MM_CREATE_DEFAULT;
-               }
-
-               /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
-                * so we know that we always have a minimum alignment of 4096.
-                * The drm_mm range manager is optimised to return results
-                * with zero alignment, so where possible use the optimal
-                * path.
-                */
-               if (alignment <= 4096)
-                       alignment = 0;
-
-search_free:
-               ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
-                                                         &vma->node,
-                                                         size, alignment,
-                                                         obj->cache_level,
-                                                         start, end,
-                                                         search_flag,
-                                                         alloc_flag);
-               if (ret) {
-                       ret = i915_gem_evict_something(vma->vm, size, alignment,
-                                                      obj->cache_level,
-                                                      start, end,
-                                                      flags);
-                       if (ret == 0)
-                               goto search_free;
-
-                       goto err_unpin;
-               }
+               ret = wait_for_timeline(&i915->gt.global_timeline, flags);
+               if (ret)
+                       return ret;
         }
-       GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
-
-       list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
-       list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
-       obj->bind_count++;
  
         return 0;
-
-err_unpin:
-       i915_gem_object_unpin_pages(obj);
-       return ret;
  }
  
-bool
-i915_gem_clflush_object(struct drm_i915_gem_object *obj,
-                       bool force)
+void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+                            bool force)
  {
         /* If we don't have a page list set up, then we're not pinned
          * to GPU, and we can ignore the cache flush because it'll happen
          * again at bind time.
          */
-       if (obj->pages == NULL)
-               return false;
+       if (!obj->mm.pages)
+               return;
  
         /*
          * Stolen memory is always coherent with the GPU as it is explicitly
          * marked as wc by the system, or the system is cache-coherent.
          */
         if (obj->stolen || obj->phys_handle)
-               return false;
+               return;
  
         /* If the GPU is snooping the contents of the CPU cache,
          * we do not need to manually clear the CPU cache lines.  However,
@@ -3138,14 +3072,12 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
          */
         if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
                 obj->cache_dirty = true;
-               return false;
+               return;
         }
  
         trace_i915_gem_object_clflush(obj);
-       drm_clflush_sg(obj->pages);
+       drm_clflush_sg(obj->mm.pages);
         obj->cache_dirty = false;
-
-       return true;
  }
  
  /** Flushes the GTT write domain for the object if it's dirty. */
@@ -3174,7 +3106,7 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
          */
         wmb();
         if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv))
-               POSTING_READ(RING_ACTHD(dev_priv->engine[RCS].mmio_base));
+               POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
  
         intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT));
  
@@ -3191,9 +3123,7 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
         if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
                 return;
  
-       if (i915_gem_clflush_object(obj, obj->pin_display))
-               i915_gem_chipset_flush(to_i915(obj->base.dev));
-
+       i915_gem_clflush_object(obj, obj->pin_display);
         intel_fb_obj_flush(obj, false, ORIGIN_CPU);
  
         obj->base.write_domain = 0;
@@ -3202,24 +3132,6 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
                                             I915_GEM_DOMAIN_CPU);
  }
  
-static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
-{
-       struct i915_vma *vma;
-
-       list_for_each_entry(vma, &obj->vma_list, obj_link) {
-               if (!i915_vma_is_ggtt(vma))
-                       continue;
-
-               if (i915_vma_is_active(vma))
-                       continue;
-
-               if (!drm_mm_node_allocated(&vma->node))
-                       continue;
-
-               list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
-       }
-}
-
  /**
   * Moves a single object to the GTT read, and possibly write domain.
   * @obj: object to act on
@@ -3234,7 +3146,14 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
         uint32_t old_write_domain, old_read_domains;
         int ret;
  
-       ret = i915_gem_object_wait_rendering(obj, !write);
+       lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+       ret = i915_gem_object_wait(obj,
+                                  I915_WAIT_INTERRUPTIBLE |
+                                  I915_WAIT_LOCKED |
+                                  (write ? I915_WAIT_ALL : 0),
+                                  MAX_SCHEDULE_TIMEOUT,
+                                  NULL);
         if (ret)
                 return ret;
  
@@ -3249,7 +3168,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
          * continue to assume that the obj remained out of the CPU cached
          * domain.
          */
-       ret = i915_gem_object_get_pages(obj);
+       ret = i915_gem_object_pin_pages(obj);
         if (ret)
                 return ret;
  
@@ -3268,21 +3187,19 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
         /* It should now be out of any other write domains, and we can update
          * the domain values for our changes.
          */
-       BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
+       GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
         obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
         if (write) {
                 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
                 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
-               obj->dirty = 1;
+               obj->mm.dirty = true;
         }
  
         trace_i915_gem_object_change_domain(obj,
                                             old_read_domains,
                                             old_write_domain);
  
-       /* And bump the LRU for this access */
-       i915_gem_object_bump_inactive_ggtt(obj);
-
+       i915_gem_object_unpin_pages(obj);
         return 0;
  }
  
@@ -3305,10 +3222,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
                                     enum i915_cache_level cache_level)
  {
         struct i915_vma *vma;
-       int ret = 0;
+       int ret;
+
+       lockdep_assert_held(&obj->base.dev->struct_mutex);
  
         if (obj->cache_level == cache_level)
-               goto out;
+               return 0;
  
         /* Inspect the list of currently bound VMA and unbind any that would
          * be invalid given the new cache-level. This is principally to
@@ -3351,11 +3270,17 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
                  * If we wait upon the object, we know that all the bound
                  * VMA are no longer active.
                  */
-               ret = i915_gem_object_wait_rendering(obj, false);
+               ret = i915_gem_object_wait(obj,
+                                          I915_WAIT_INTERRUPTIBLE |
+                                          I915_WAIT_LOCKED |
+                                          I915_WAIT_ALL,
+                                          MAX_SCHEDULE_TIMEOUT,
+                                          NULL);
                 if (ret)
                         return ret;
  
-               if (!HAS_LLC(obj->base.dev) && cache_level != I915_CACHE_NONE) {
+               if (!HAS_LLC(to_i915(obj->base.dev)) &&
+                   cache_level != I915_CACHE_NONE) {
                         /* Access to snoopable pages through the GTT is
                          * incoherent and on some machines causes a hard
                          * lockup. Relinquish the CPU mmaping to force
@@ -3397,20 +3322,14 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
                 }
         }
  
+       if (obj->base.write_domain == I915_GEM_DOMAIN_CPU &&
+           cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+               obj->cache_dirty = true;
+
         list_for_each_entry(vma, &obj->vma_list, obj_link)
                 vma->node.color = cache_level;
         obj->cache_level = cache_level;
  
-out:
-       /* Flush the dirty CPU caches to the backing storage so that the
-        * object is now coherent at its new cache level (with respect
-        * to the access domain).
-        */
-       if (obj->cache_dirty && cpu_write_needs_clflush(obj)) {
-               if (i915_gem_clflush_object(obj, true))
-                       i915_gem_chipset_flush(to_i915(obj->base.dev));
-       }
-
         return 0;
  }
  
@@ -3419,10 +3338,14 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
  {
         struct drm_i915_gem_caching *args = data;
         struct drm_i915_gem_object *obj;
+       int err = 0;
  
-       obj = i915_gem_object_lookup(file, args->handle);
-       if (!obj)
-               return -ENOENT;
+       rcu_read_lock();
+       obj = i915_gem_object_lookup_rcu(file, args->handle);
+       if (!obj) {
+               err = -ENOENT;
+               goto out;
+       }
  
         switch (obj->cache_level) {
         case I915_CACHE_LLC:
@@ -3438,15 +3361,15 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
                 args->caching = I915_CACHING_NONE;
                 break;
         }
-
-       i915_gem_object_put_unlocked(obj);
-       return 0;
+out:
+       rcu_read_unlock();
+       return err;
  }
  
  int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
                                struct drm_file *file)
  {
-       struct drm_i915_private *dev_priv = to_i915(dev);
+       struct drm_i915_private *i915 = to_i915(dev);
         struct drm_i915_gem_caching *args = data;
         struct drm_i915_gem_object *obj;
         enum i915_cache_level level;
@@ -3463,23 +3386,21 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
                  * cacheline, whereas normally such cachelines would get
                  * invalidated.
                  */
-               if (!HAS_LLC(dev) && !HAS_SNOOP(dev))
+               if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
                         return -ENODEV;
  
                 level = I915_CACHE_LLC;
                 break;
         case I915_CACHING_DISPLAY:
-               level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
+               level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
                 break;
         default:
                 return -EINVAL;
         }
  
-       intel_runtime_pm_get(dev_priv);
-
         ret = i915_mutex_lock_interruptible(dev);
         if (ret)
-               goto rpm_put;
+               return ret;
  
         obj = i915_gem_object_lookup(file, args->handle);
         if (!obj) {
@@ -3488,13 +3409,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
         }
  
         ret = i915_gem_object_set_cache_level(obj, level);
-
         i915_gem_object_put(obj);
  unlock:
         mutex_unlock(&dev->struct_mutex);
-rpm_put:
-       intel_runtime_pm_put(dev_priv);
-
         return ret;
  }
  
@@ -3512,6 +3429,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
         u32 old_read_domains, old_write_domain;
         int ret;
  
+       lockdep_assert_held(&obj->base.dev->struct_mutex);
+
         /* Mark the pin_display early so that we account for the
          * display coherency whilst setting up the cache domains.
          */
@@ -3527,7 +3446,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
          * with that bit in the PTE to main memory with just one PIPE_CONTROL.
          */
         ret = i915_gem_object_set_cache_level(obj,
-                                             HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
+                                             HAS_WT(to_i915(obj->base.dev)) ?
+                                             I915_CACHE_WT : I915_CACHE_NONE);
         if (ret) {
                 vma = ERR_PTR(ret);
                 goto err_unpin_display;
@@ -3565,7 +3485,11 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
  
         vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
  
-       i915_gem_object_flush_cpu_write_domain(obj);
+       /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
+       if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
+               i915_gem_clflush_object(obj, true);
+               intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
+       }
  
         old_write_domain = obj->base.write_domain;
         old_read_domains = obj->base.read_domains;
@@ -3590,6 +3514,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
  void
  i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
  {
+       lockdep_assert_held(&vma->vm->dev->struct_mutex);
+
         if (WARN_ON(vma->obj->pin_display == 0))
                 return;
  
@@ -3617,7 +3543,14 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
         uint32_t old_write_domain, old_read_domains;
         int ret;
  
-       ret = i915_gem_object_wait_rendering(obj, !write);
+       lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+       ret = i915_gem_object_wait(obj,
+                                  I915_WAIT_INTERRUPTIBLE |
+                                  I915_WAIT_LOCKED |
+                                  (write ? I915_WAIT_ALL : 0),
+                                  MAX_SCHEDULE_TIMEOUT,
+                                  NULL);
         if (ret)
                 return ret;
  
@@ -3639,7 +3572,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
         /* It should now be out of any other write domains, and we can update
          * the domain values for our changes.
          */
-       BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
+       GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
  
         /* If we're writing through the CPU, then the GPU read domains will
          * need to be invalidated at next use.
@@ -3673,11 +3606,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
         struct drm_i915_file_private *file_priv = file->driver_priv;
         unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
         struct drm_i915_gem_request *request, *target = NULL;
-       int ret;
-
-       ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
-       if (ret)
-               return ret;
+       long ret;
  
         /* ABI: return -EIO if already wedged */
         if (i915_terminally_wedged(&dev_priv->gpu_error))
@@ -3691,116 +3620,25 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
                 /*
                  * Note that the request might not have been submitted yet.
                  * In which case emitted_jiffies will be zero.
-                */
-               if (!request->emitted_jiffies)
-                       continue;
-
-               target = request;
-       }
-       if (target)
-               i915_gem_request_get(target);
-       spin_unlock(&file_priv->mm.lock);
-
-       if (target == NULL)
-               return 0;
-
-       ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
-       i915_gem_request_put(target);
-
-       return ret;
-}
-
-static bool
-i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
-{
-       if (!drm_mm_node_allocated(&vma->node))
-               return false;
-
-       if (vma->node.size < size)
-               return true;
-
-       if (alignment && vma->node.start & (alignment - 1))
-               return true;
-
-       if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma))
-               return true;
-
-       if (flags & PIN_OFFSET_BIAS &&
-           vma->node.start < (flags & PIN_OFFSET_MASK))
-               return true;
-
-       if (flags & PIN_OFFSET_FIXED &&
-           vma->node.start != (flags & PIN_OFFSET_MASK))
-               return true;
-
-       return false;
-}
-
-void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
-{
-       struct drm_i915_gem_object *obj = vma->obj;
-       struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-       bool mappable, fenceable;
-       u32 fence_size, fence_alignment;
-
-       fence_size = i915_gem_get_ggtt_size(dev_priv,
-                                           vma->size,
-                                           i915_gem_object_get_tiling(obj));
-       fence_alignment = i915_gem_get_ggtt_alignment(dev_priv,
-                                                     vma->size,
-                                                     i915_gem_object_get_tiling(obj),
-                                                     true);
-
-       fenceable = (vma->node.size == fence_size &&
-                    (vma->node.start & (fence_alignment - 1)) == 0);
-
-       mappable = (vma->node.start + fence_size <=
-                   dev_priv->ggtt.mappable_end);
-
-       /*
-        * Explicitly disable for rotated VMA since the display does not
-        * need the fence and the VMA is not accessible to other users.
-        */
-       if (mappable && fenceable &&
-           vma->ggtt_view.type != I915_GGTT_VIEW_ROTATED)
-               vma->flags |= I915_VMA_CAN_FENCE;
-       else
-               vma->flags &= ~I915_VMA_CAN_FENCE;
-}
-
-int __i915_vma_do_pin(struct i915_vma *vma,
-                     u64 size, u64 alignment, u64 flags)
-{
-       unsigned int bound = vma->flags;
-       int ret;
-
-       GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
-       GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
-
-       if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
-               ret = -EBUSY;
-               goto err;
-       }
+                */
+               if (!request->emitted_jiffies)
+                       continue;
  
-       if ((bound & I915_VMA_BIND_MASK) == 0) {
-               ret = i915_vma_insert(vma, size, alignment, flags);
-               if (ret)
-                       goto err;
+               target = request;
         }
+       if (target)
+               i915_gem_request_get(target);
+       spin_unlock(&file_priv->mm.lock);
  
-       ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
-       if (ret)
-               goto err;
-
-       if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
-               __i915_vma_set_map_and_fenceable(vma);
+       if (target == NULL)
+               return 0;
  
-       GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
-       return 0;
+       ret = i915_wait_request(target,
+                               I915_WAIT_INTERRUPTIBLE,
+                               MAX_SCHEDULE_TIMEOUT);
+       i915_gem_request_put(target);
  
-err:
-       __i915_vma_unpin(vma);
-       return ret;
+       return ret < 0 ? ret : 0;
  }
  
  struct i915_vma *
@@ -3810,10 +3648,13 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
                          u64 alignment,
                          u64 flags)
  {
-       struct i915_address_space *vm = &to_i915(obj->base.dev)->ggtt.base;
+       struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+       struct i915_address_space *vm = &dev_priv->ggtt.base;
         struct i915_vma *vma;
         int ret;
  
+       lockdep_assert_held(&obj->base.dev->struct_mutex);
+
         vma = i915_gem_obj_lookup_or_create_vma(obj, vm, view);
         if (IS_ERR(vma))
                 return vma;
@@ -3823,6 +3664,41 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
                     (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)))
                         return ERR_PTR(-ENOSPC);
  
+               if (flags & PIN_MAPPABLE) {
+                       u32 fence_size;
+
+                       fence_size = i915_gem_get_ggtt_size(dev_priv, vma->size,
+                                                           i915_gem_object_get_tiling(obj));
+                       /* If the required space is larger than the available
+                        * aperture, we will not able to find a slot for the
+                        * object and unbinding the object now will be in
+                        * vain. Worse, doing so may cause us to ping-pong
+                        * the object in and out of the Global GTT and
+                        * waste a lot of cycles under the mutex.
+                        */
+                       if (fence_size > dev_priv->ggtt.mappable_end)
+                               return ERR_PTR(-E2BIG);
+
+                       /* If NONBLOCK is set the caller is optimistically
+                        * trying to cache the full object within the mappable
+                        * aperture, and *must* have a fallback in place for
+                        * situations where we cannot bind the object. We
+                        * can be a little more lax here and use the fallback
+                        * more often to avoid costly migrations of ourselves
+                        * and other objects within the aperture.
+                        *
+                        * Half-the-aperture is used as a simple heuristic.
+                        * More interesting would to do search for a free
+                        * block prior to making the commitment to unbind.
+                        * That caters for the self-harm case, and with a
+                        * little more heuristics (e.g. NOFAULT, NOEVICT)
+                        * we could try to minimise harm to others.
+                        */
+                       if (flags & PIN_NONBLOCK &&
+                           fence_size > dev_priv->ggtt.mappable_end / 2)
+                               return ERR_PTR(-ENOSPC);
+               }
+
                 WARN(i915_vma_is_pinned(vma),
                      "bo is already pinned in ggtt with incorrect alignment:"
                      " offset=%08x, req.alignment=%llx,"
@@ -3869,83 +3745,42 @@ static __always_inline unsigned int __busy_write_id(unsigned int id)
  }
  
  static __always_inline unsigned int
-__busy_set_if_active(const struct i915_gem_active *active,
+__busy_set_if_active(const struct dma_fence *fence,
                      unsigned int (*flag)(unsigned int id))
  {
-       struct drm_i915_gem_request *request;
-
-       request = rcu_dereference(active->request);
-       if (!request || i915_gem_request_completed(request))
-               return 0;
+       struct drm_i915_gem_request *rq;
  
-       /* This is racy. See __i915_gem_active_get_rcu() for an in detail
-        * discussion of how to handle the race correctly, but for reporting
-        * the busy state we err on the side of potentially reporting the
-        * wrong engine as being busy (but we guarantee that the result
-        * is at least self-consistent).
-        *
-        * As we use SLAB_DESTROY_BY_RCU, the request may be reallocated
-        * whilst we are inspecting it, even under the RCU read lock as we are.
-        * This means that there is a small window for the engine and/or the
-        * seqno to have been overwritten. The seqno will always be in the
-        * future compared to the intended, and so we know that if that
-        * seqno is idle (on whatever engine) our request is idle and the
-        * return 0 above is correct.
-        *
-        * The issue is that if the engine is switched, it is just as likely
-        * to report that it is busy (but since the switch happened, we know
-        * the request should be idle). So there is a small chance that a busy
-        * result is actually the wrong engine.
-        *
-        * So why don't we care?
-        *
-        * For starters, the busy ioctl is a heuristic that is by definition
-        * racy. Even with perfect serialisation in the driver, the hardware
-        * state is constantly advancing - the state we report to the user
-        * is stale.
+       /* We have to check the current hw status of the fence as the uABI
+        * guarantees forward progress. We could rely on the idle worker
+        * to eventually flush us, but to minimise latency just ask the
+        * hardware.
          *
-        * The critical information for the busy-ioctl is whether the object
-        * is idle as userspace relies on that to detect whether its next
-        * access will stall, or if it has missed submitting commands to
-        * the hardware allowing the GPU to stall. We never generate a
-        * false-positive for idleness, thus busy-ioctl is reliable at the
-        * most fundamental level, and we maintain the guarantee that a
-        * busy object left to itself will eventually become idle (and stay
-        * idle!).
-        *
-        * We allow ourselves the leeway of potentially misreporting the busy
-        * state because that is an optimisation heuristic that is constantly
-        * in flux. Being quickly able to detect the busy/idle state is much
-        * more important than accurate logging of exactly which engines were
-        * busy.
-        *
-        * For accuracy in reporting the engine, we could use
-        *
-        *      result = 0;
-        *      request = __i915_gem_active_get_rcu(active);
-        *      if (request) {
-        *              if (!i915_gem_request_completed(request))
-        *                      result = flag(request->engine->exec_id);
-        *              i915_gem_request_put(request);
-        *      }
-        *
-        * but that still remains susceptible to both hardware and userspace
-        * races. So we accept making the result of that race slightly worse,
-        * given the rarity of the race and its low impact on the result.
+        * Note we only report on the status of native fences.
          */
-       return flag(READ_ONCE(request->engine->exec_id));
+       if (!dma_fence_is_i915(fence))
+               return 0;
+
+       /* opencode to_request() in order to avoid const warnings */
+       rq = container_of(fence, struct drm_i915_gem_request, fence);
+       if (i915_gem_request_completed(rq))
+               return 0;
+
+       return flag(rq->engine->exec_id);
  }
  
  static __always_inline unsigned int
-busy_check_reader(const struct i915_gem_active *active)
+busy_check_reader(const struct dma_fence *fence)
  {
-       return __busy_set_if_active(active, __busy_read_flag);
+       return __busy_set_if_active(fence, __busy_read_flag);
  }
  
  static __always_inline unsigned int
-busy_check_writer(const struct i915_gem_active *active)
+busy_check_writer(const struct dma_fence *fence)
  {
-       return __busy_set_if_active(active, __busy_write_id);
+       if (!fence)
+               return 0;
+
+       return __busy_set_if_active(fence, __busy_write_id);
  }
  
  int
@@ -3954,64 +3789,58 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
  {
         struct drm_i915_gem_busy *args = data;
         struct drm_i915_gem_object *obj;
-       unsigned long active;
+       struct reservation_object_list *list;
+       unsigned int seq;
+       int err;
  
-       obj = i915_gem_object_lookup(file, args->handle);
+       err = -ENOENT;
+       rcu_read_lock();
+       obj = i915_gem_object_lookup_rcu(file, args->handle);
         if (!obj)
-               return -ENOENT;
+               goto out;
  
-       args->busy = 0;
-       active = __I915_BO_ACTIVE(obj);
-       if (active) {
-               int idx;
+       /* A discrepancy here is that we do not report the status of
+        * non-i915 fences, i.e. even though we may report the object as idle,
+        * a call to set-domain may still stall waiting for foreign rendering.
+        * This also means that wait-ioctl may report an object as busy,
+        * where busy-ioctl considers it idle.
+        *
+        * We trade the ability to warn of foreign fences to report on which
+        * i915 engines are active for the object.
+        *
+        * Alternatively, we can trade that extra information on read/write
+        * activity with
+        *      args->busy =
+        *              !reservation_object_test_signaled_rcu(obj->resv, true);
+        * to report the overall busyness. This is what the wait-ioctl does.
+        *
+        */
+retry:
+       seq = raw_read_seqcount(&obj->resv->seq);
  
-               /* Yes, the lookups are intentionally racy.
-                *
-                * First, we cannot simply rely on __I915_BO_ACTIVE. We have
-                * to regard the value as stale and as our ABI guarantees
-                * forward progress, we confirm the status of each active
-                * request with the hardware.
-                *
-                * Even though we guard the pointer lookup by RCU, that only
-                * guarantees that the pointer and its contents remain
-                * dereferencable and does *not* mean that the request we
-                * have is the same as the one being tracked by the object.
-                *
-                * Consider that we lookup the request just as it is being
-                * retired and freed. We take a local copy of the pointer,
-                * but before we add its engine into the busy set, the other
-                * thread reallocates it and assigns it to a task on another
-                * engine with a fresh and incomplete seqno. Guarding against
-                * that requires careful serialisation and reference counting,
-                * i.e. using __i915_gem_active_get_request_rcu(). We don't,
-                * instead we expect that if the result is busy, which engines
-                * are busy is not completely reliable - we only guarantee
-                * that the object was busy.
-                */
-               rcu_read_lock();
+       /* Translate the exclusive fence to the READ *and* WRITE engine */
+       args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
  
-               for_each_active(active, idx)
-                       args->busy |= busy_check_reader(&obj->last_read[idx]);
+       /* Translate shared fences to READ set of engines */
+       list = rcu_dereference(obj->resv->fence);
+       if (list) {
+               unsigned int shared_count = list->shared_count, i;
  
-               /* For ABI sanity, we only care that the write engine is in
-                * the set of read engines. This should be ensured by the
-                * ordering of setting last_read/last_write in
-                * i915_vma_move_to_active(), and then in reverse in retire.
-                * However, for good measure, we always report the last_write
-                * request as a busy read as well as being a busy write.
-                *
-                * We don't care that the set of active read/write engines
-                * may change during construction of the result, as it is
-                * equally liable to change before userspace can inspect
-                * the result.
-                */
-               args->busy |= busy_check_writer(&obj->last_write);
+               for (i = 0; i < shared_count; ++i) {
+                       struct dma_fence *fence =
+                               rcu_dereference(list->shared[i]);
  
-               rcu_read_unlock();
+                       args->busy |= busy_check_reader(fence);
+               }
         }
  
-       i915_gem_object_put_unlocked(obj);
-       return 0;
+       if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
+               goto retry;
+
+       err = 0;
+out:
+       rcu_read_unlock();
+       return err;
  }
  
  int
@@ -4028,7 +3857,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
         struct drm_i915_private *dev_priv = to_i915(dev);
         struct drm_i915_gem_madvise *args = data;
         struct drm_i915_gem_object *obj;
-       int ret;
+       int err;
  
         switch (args->madv) {
         case I915_MADV_DONTNEED:
@@ -4038,77 +3867,111 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
             return -EINVAL;
         }
  
-       ret = i915_mutex_lock_interruptible(dev);
-       if (ret)
-               return ret;
-
         obj = i915_gem_object_lookup(file_priv, args->handle);
-       if (!obj) {
-               ret = -ENOENT;
-               goto unlock;
-       }
+       if (!obj)
+               return -ENOENT;
+
+       err = mutex_lock_interruptible(&obj->mm.lock);
+       if (err)
+               goto out;
  
-       if (obj->pages &&
+       if (obj->mm.pages &&
             i915_gem_object_is_tiled(obj) &&
             dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
-               if (obj->madv == I915_MADV_WILLNEED)
-                       i915_gem_object_unpin_pages(obj);
-               if (args->madv == I915_MADV_WILLNEED)
-                       i915_gem_object_pin_pages(obj);
+               if (obj->mm.madv == I915_MADV_WILLNEED) {
+                       GEM_BUG_ON(!obj->mm.quirked);
+                       __i915_gem_object_unpin_pages(obj);
+                       obj->mm.quirked = false;
+               }
+               if (args->madv == I915_MADV_WILLNEED) {
+                       GEM_BUG_ON(obj->mm.quirked);
+                       __i915_gem_object_pin_pages(obj);
+                       obj->mm.quirked = true;
+               }
         }
  
-       if (obj->madv != __I915_MADV_PURGED)
-               obj->madv = args->madv;
+       if (obj->mm.madv != __I915_MADV_PURGED)
+               obj->mm.madv = args->madv;
  
         /* if the object is no longer attached, discard its backing storage */
-       if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
+       if (obj->mm.madv == I915_MADV_DONTNEED && !obj->mm.pages)
                 i915_gem_object_truncate(obj);
  
-       args->retained = obj->madv != __I915_MADV_PURGED;
+       args->retained = obj->mm.madv != __I915_MADV_PURGED;
+       mutex_unlock(&obj->mm.lock);
  
+out:
         i915_gem_object_put(obj);
-unlock:
-       mutex_unlock(&dev->struct_mutex);
-       return ret;
+       return err;
+}
+
+static void
+frontbuffer_retire(struct i915_gem_active *active,
+                  struct drm_i915_gem_request *request)
+{
+       struct drm_i915_gem_object *obj =
+               container_of(active, typeof(*obj), frontbuffer_write);
+
+       intel_fb_obj_flush(obj, true, ORIGIN_CS);
  }
  
  void i915_gem_object_init(struct drm_i915_gem_object *obj,
                           const struct drm_i915_gem_object_ops *ops)
  {
-       int i;
+       mutex_init(&obj->mm.lock);
  
-       INIT_LIST_HEAD(&obj->global_list);
-       for (i = 0; i < I915_NUM_ENGINES; i++)
-               init_request_active(&obj->last_read[i],
-                                   i915_gem_object_retire__read);
-       init_request_active(&obj->last_write,
-                           i915_gem_object_retire__write);
+       INIT_LIST_HEAD(&obj->global_link);
+       INIT_LIST_HEAD(&obj->userfault_link);
         INIT_LIST_HEAD(&obj->obj_exec_link);
         INIT_LIST_HEAD(&obj->vma_list);
         INIT_LIST_HEAD(&obj->batch_pool_link);
  
         obj->ops = ops;
  
+       reservation_object_init(&obj->__builtin_resv);
+       obj->resv = &obj->__builtin_resv;
+
         obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
-       obj->madv = I915_MADV_WILLNEED;
+       init_request_active(&obj->frontbuffer_write, frontbuffer_retire);
+
+       obj->mm.madv = I915_MADV_WILLNEED;
+       INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
+       mutex_init(&obj->mm.get_page.lock);
  
         i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
  }
  
  static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
-       .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
+       .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
+                I915_GEM_OBJECT_IS_SHRINKABLE,
         .get_pages = i915_gem_object_get_pages_gtt,
         .put_pages = i915_gem_object_put_pages_gtt,
  };
  
-struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev,
-                                                 size_t size)
+/* Note we don't consider signbits :| */
+#define overflows_type(x, T) \
+       (sizeof(x) > sizeof(T) && (x) >> (sizeof(T) * BITS_PER_BYTE))
+
+struct drm_i915_gem_object *
+i915_gem_object_create(struct drm_device *dev, u64 size)
  {
+       struct drm_i915_private *dev_priv = to_i915(dev);
         struct drm_i915_gem_object *obj;
         struct address_space *mapping;
         gfp_t mask;
         int ret;
  
+       /* There is a prevalence of the assumption that we fit the object's
+        * page count inside a 32bit _signed_ variable. Let's document this and
+        * catch if we ever need to fix it. In the meantime, if you do spot
+        * such a local variable, please consider fixing!
+        */
+       if (WARN_ON(size >> PAGE_SHIFT > INT_MAX))
+               return ERR_PTR(-E2BIG);
+
+       if (overflows_type(size, obj->base.size))
+               return ERR_PTR(-E2BIG);
+
         obj = i915_gem_object_alloc(dev);
         if (obj == NULL)
                 return ERR_PTR(-ENOMEM);
@@ -4118,7 +3981,7 @@ struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev,
                 goto fail;
  
         mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
-       if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
+       if (IS_CRESTLINE(dev_priv) || IS_BROADWATER(dev_priv)) {
                 /* 965gm cannot relocate objects above 4GiB. */
                 mask &= ~__GFP_HIGHMEM;
                 mask |= __GFP_DMA32;
@@ -4132,7 +3995,7 @@ struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev,
         obj->base.write_domain = I915_GEM_DOMAIN_CPU;
         obj->base.read_domains = I915_GEM_DOMAIN_CPU;
  
-       if (HAS_LLC(dev)) {
+       if (HAS_LLC(dev_priv)) {
                 /* On some devices, we can have the GPU use the LLC (the CPU
                  * cache) for about a 10% performance improvement
                  * compared to uncached.  Graphics requests other than
@@ -4155,7 +4018,6 @@ struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev,
  
  fail:
         i915_gem_object_free(obj);
-
         return ERR_PTR(ret);
  }
  
@@ -4167,7 +4029,7 @@ static bool discard_backing_storage(struct drm_i915_gem_object *obj)
          * back the contents from the GPU.
          */
  
-       if (obj->madv != I915_MADV_WILLNEED)
+       if (obj->mm.madv != I915_MADV_WILLNEED)
                 return false;
  
         if (obj->base.filp == NULL)
@@ -4183,16 +4045,72 @@ static bool discard_backing_storage(struct drm_i915_gem_object *obj)
         return atomic_long_read(&obj->base.filp->f_count) == 1;
  }
  
-void i915_gem_free_object(struct drm_gem_object *gem_obj)
+static void __i915_gem_free_objects(struct drm_i915_private *i915,
+                                   struct llist_node *freed)
  {
-       struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
-       struct drm_device *dev = obj->base.dev;
-       struct drm_i915_private *dev_priv = to_i915(dev);
-       struct i915_vma *vma, *next;
+       struct drm_i915_gem_object *obj, *on;
  
-       intel_runtime_pm_get(dev_priv);
+       mutex_lock(&i915->drm.struct_mutex);
+       intel_runtime_pm_get(i915);
+       llist_for_each_entry(obj, freed, freed) {
+               struct i915_vma *vma, *vn;
+
+               trace_i915_gem_object_destroy(obj);
+
+               GEM_BUG_ON(i915_gem_object_is_active(obj));
+               list_for_each_entry_safe(vma, vn,
+                                        &obj->vma_list, obj_link) {
+                       GEM_BUG_ON(!i915_vma_is_ggtt(vma));
+                       GEM_BUG_ON(i915_vma_is_active(vma));
+                       vma->flags &= ~I915_VMA_PIN_MASK;
+                       i915_vma_close(vma);
+               }
+               GEM_BUG_ON(!list_empty(&obj->vma_list));
+               GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree));
+
+               list_del(&obj->global_link);
+       }
+       intel_runtime_pm_put(i915);
+       mutex_unlock(&i915->drm.struct_mutex);
+
+       llist_for_each_entry_safe(obj, on, freed, freed) {
+               GEM_BUG_ON(obj->bind_count);
+               GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
  
-       trace_i915_gem_object_destroy(obj);
+               if (obj->ops->release)
+                       obj->ops->release(obj);
+
+               if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
+                       atomic_set(&obj->mm.pages_pin_count, 0);
+               __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+               GEM_BUG_ON(obj->mm.pages);
+
+               if (obj->base.import_attach)
+                       drm_prime_gem_destroy(&obj->base, NULL);
+
+               reservation_object_fini(&obj->__builtin_resv);
+               drm_gem_object_release(&obj->base);
+               i915_gem_info_remove_obj(i915, obj->base.size);
+
+               kfree(obj->bit_17);
+               i915_gem_object_free(obj);
+       }
+}
+
+static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
+{
+       struct llist_node *freed;
+
+       freed = llist_del_all(&i915->mm.free_list);
+       if (unlikely(freed))
+               __i915_gem_free_objects(i915, freed);
+}
+
+static void __i915_gem_free_work(struct work_struct *work)
+{
+       struct drm_i915_private *i915 =
+               container_of(work, struct drm_i915_private, mm.free_work);
+       struct llist_node *freed;
  
         /* All file-owned VMA should have been released by this point through
          * i915_gem_close_object(), or earlier by i915_gem_context_close().
@@ -4201,47 +4119,62 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
          * the GTT either for the user or for scanout). Those VMA still need to
          * unbound now.
          */
-       list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
-               GEM_BUG_ON(!i915_vma_is_ggtt(vma));
-               GEM_BUG_ON(i915_vma_is_active(vma));
-               vma->flags &= ~I915_VMA_PIN_MASK;
-               i915_vma_close(vma);
-       }
-       GEM_BUG_ON(obj->bind_count);
  
-       /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
-        * before progressing. */
-       if (obj->stolen)
-               i915_gem_object_unpin_pages(obj);
+       while ((freed = llist_del_all(&i915->mm.free_list)))
+               __i915_gem_free_objects(i915, freed);
+}
+
+static void __i915_gem_free_object_rcu(struct rcu_head *head)
+{
+       struct drm_i915_gem_object *obj =
+               container_of(head, typeof(*obj), rcu);
+       struct drm_i915_private *i915 = to_i915(obj->base.dev);
  
-       WARN_ON(atomic_read(&obj->frontbuffer_bits));
+       /* We can't simply use call_rcu() from i915_gem_free_object()
+        * as we need to block whilst unbinding, and the call_rcu
+        * task may be called from softirq context. So we take a
+        * detour through a worker.
+        */
+       if (llist_add(&obj->freed, &i915->mm.free_list))
+               schedule_work(&i915->mm.free_work);
+}
  
-       if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
-           dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
-           i915_gem_object_is_tiled(obj))
-               i915_gem_object_unpin_pages(obj);
+void i915_gem_free_object(struct drm_gem_object *gem_obj)
+{
+       struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
  
-       if (WARN_ON(obj->pages_pin_count))
-               obj->pages_pin_count = 0;
-       if (discard_backing_storage(obj))
-               obj->madv = I915_MADV_DONTNEED;
-       i915_gem_object_put_pages(obj);
+       if (obj->mm.quirked)
+               __i915_gem_object_unpin_pages(obj);
  
-       BUG_ON(obj->pages);
+       if (discard_backing_storage(obj))
+               obj->mm.madv = I915_MADV_DONTNEED;
  
-       if (obj->base.import_attach)
-               drm_prime_gem_destroy(&obj->base, NULL);
+       /* Before we free the object, make sure any pure RCU-only
+        * read-side critical sections are complete, e.g.
+        * i915_gem_busy_ioctl(). For the corresponding synchronized
+        * lookup see i915_gem_object_lookup_rcu().
+        */
+       call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
+}
  
-       if (obj->ops->release)
-               obj->ops->release(obj);
+void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
+{
+       lockdep_assert_held(&obj->base.dev->struct_mutex);
  
-       drm_gem_object_release(&obj->base);
-       i915_gem_info_remove_obj(dev_priv, obj->base.size);
+       GEM_BUG_ON(i915_gem_object_has_active_reference(obj));
+       if (i915_gem_object_is_active(obj))
+               i915_gem_object_set_active_reference(obj);
+       else
+               i915_gem_object_put(obj);
+}
  
-       kfree(obj->bit_17);
-       i915_gem_object_free(obj);
+static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv)
+{
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
  
-       intel_runtime_pm_put(dev_priv);
+       for_each_engine(engine, dev_priv, id)
+               GEM_BUG_ON(engine->last_context != dev_priv->kernel_context);
  }
  
  int i915_gem_suspend(struct drm_device *dev)
@@ -4272,18 +4205,46 @@ int i915_gem_suspend(struct drm_device *dev)
                 goto err;
  
         i915_gem_retire_requests(dev_priv);
+       GEM_BUG_ON(dev_priv->gt.active_requests);
  
+       assert_kernel_context_is_current(dev_priv);
         i915_gem_context_lost(dev_priv);
         mutex_unlock(&dev->struct_mutex);
  
         cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
         cancel_delayed_work_sync(&dev_priv->gt.retire_work);
         flush_delayed_work(&dev_priv->gt.idle_work);
+       flush_work(&dev_priv->mm.free_work);
  
         /* Assert that we sucessfully flushed all the work and
          * reset the GPU back to its idle, low power state.
          */
         WARN_ON(dev_priv->gt.awake);
+       WARN_ON(!intel_execlists_idle(dev_priv));
+
+       /*
+        * Neither the BIOS, ourselves or any other kernel
+        * expects the system to be in execlists mode on startup,
+        * so we need to reset the GPU back to legacy mode. And the only
+        * known way to disable logical contexts is through a GPU reset.
+        *
+        * So in order to leave the system in a known default configuration,
+        * always reset the GPU upon unload and suspend. Afterwards we then
+        * clean up the GEM state tracking, flushing off the requests and
+        * leaving the system in a known idle state.
+        *
+        * Note that is of the upmost importance that the GPU is idle and
+        * all stray writes are flushed *before* we dismantle the backing
+        * storage for the pinned objects.
+        *
+        * However, since we are uncertain that resetting the GPU on older
+        * machines is a good idea, we don't - just in case it leaves the
+        * machine in an unusable condition.
+        */
+       if (HAS_HW_CONTEXTS(dev_priv)) {
+               int reset = intel_gpu_reset(dev_priv, ALL_ENGINES);
+               WARN_ON(reset && reset != -ENODEV);
+       }
  
         return 0;
  
@@ -4296,8 +4257,10 @@ void i915_gem_resume(struct drm_device *dev)
  {
         struct drm_i915_private *dev_priv = to_i915(dev);
  
+       WARN_ON(dev_priv->gt.awake);
+
         mutex_lock(&dev->struct_mutex);
-       i915_gem_restore_gtt_mappings(dev);
+       i915_gem_restore_gtt_mappings(dev_priv);
  
         /* As we didn't flush the kernel context before suspend, we cannot
          * guarantee that the context image is complete. So let's just reset
@@ -4308,55 +4271,51 @@ void i915_gem_resume(struct drm_device *dev)
         mutex_unlock(&dev->struct_mutex);
  }
  
-void i915_gem_init_swizzling(struct drm_device *dev)
+void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
  {
-       struct drm_i915_private *dev_priv = to_i915(dev);
-
-       if (INTEL_INFO(dev)->gen < 5 ||
+       if (INTEL_GEN(dev_priv) < 5 ||
             dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
                 return;
  
         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
                                  DISP_TILE_SURFACE_SWIZZLING);
  
-       if (IS_GEN5(dev))
+       if (IS_GEN5(dev_priv))
                 return;
  
         I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
-       if (IS_GEN6(dev))
+       if (IS_GEN6(dev_priv))
                 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
-       else if (IS_GEN7(dev))
+       else if (IS_GEN7(dev_priv))
                 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
-       else if (IS_GEN8(dev))
+       else if (IS_GEN8(dev_priv))
                 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
         else
                 BUG();
  }
  
-static void init_unused_ring(struct drm_device *dev, u32 base)
+static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base)
  {
-       struct drm_i915_private *dev_priv = to_i915(dev);
-
         I915_WRITE(RING_CTL(base), 0);
         I915_WRITE(RING_HEAD(base), 0);
         I915_WRITE(RING_TAIL(base), 0);
         I915_WRITE(RING_START(base), 0);
  }
  
-static void init_unused_rings(struct drm_device *dev)
-{
-       if (IS_I830(dev)) {
-               init_unused_ring(dev, PRB1_BASE);
-               init_unused_ring(dev, SRB0_BASE);
-               init_unused_ring(dev, SRB1_BASE);
-               init_unused_ring(dev, SRB2_BASE);
-               init_unused_ring(dev, SRB3_BASE);
-       } else if (IS_GEN2(dev)) {
-               init_unused_ring(dev, SRB0_BASE);
-               init_unused_ring(dev, SRB1_BASE);
-       } else if (IS_GEN3(dev)) {
-               init_unused_ring(dev, PRB1_BASE);
-               init_unused_ring(dev, PRB2_BASE);
+static void init_unused_rings(struct drm_i915_private *dev_priv)
+{
+       if (IS_I830(dev_priv)) {
+               init_unused_ring(dev_priv, PRB1_BASE);
+               init_unused_ring(dev_priv, SRB0_BASE);
+               init_unused_ring(dev_priv, SRB1_BASE);
+               init_unused_ring(dev_priv, SRB2_BASE);
+               init_unused_ring(dev_priv, SRB3_BASE);
+       } else if (IS_GEN2(dev_priv)) {
+               init_unused_ring(dev_priv, SRB0_BASE);
+               init_unused_ring(dev_priv, SRB1_BASE);
+       } else if (IS_GEN3(dev_priv)) {
+               init_unused_ring(dev_priv, PRB1_BASE);
+               init_unused_ring(dev_priv, PRB2_BASE);
         }
  }
  
@@ -4365,31 +4324,34 @@ i915_gem_init_hw(struct drm_device *dev)
  {
         struct drm_i915_private *dev_priv = to_i915(dev);
         struct intel_engine_cs *engine;
+       enum intel_engine_id id;
         int ret;
  
+       dev_priv->gt.last_init_time = ktime_get();
+
         /* Double layer security blanket, see i915_gem_init() */
         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
  
-       if (HAS_EDRAM(dev) && INTEL_GEN(dev_priv) < 9)
+       if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9)
                 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
  
-       if (IS_HASWELL(dev))
-               I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ?
+       if (IS_HASWELL(dev_priv))
+               I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ?
                            LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
  
-       if (HAS_PCH_NOP(dev)) {
-               if (IS_IVYBRIDGE(dev)) {
+       if (HAS_PCH_NOP(dev_priv)) {
+               if (IS_IVYBRIDGE(dev_priv)) {
                         u32 temp = I915_READ(GEN7_MSG_CTL);
                         temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
                         I915_WRITE(GEN7_MSG_CTL, temp);
-               } else if (INTEL_INFO(dev)->gen >= 7) {
+               } else if (INTEL_GEN(dev_priv) >= 7) {
                         u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
                         temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
                         I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
                 }
         }
  
-       i915_gem_init_swizzling(dev);
+       i915_gem_init_swizzling(dev_priv);
  
         /*
          * At least 830 can leave some of the unused rings
@@ -4397,18 +4359,18 @@ i915_gem_init_hw(struct drm_device *dev)
          * will prevent c3 entry. Makes sure all unused rings
          * are totally idle.
          */
-       init_unused_rings(dev);
+       init_unused_rings(dev_priv);
  
         BUG_ON(!dev_priv->kernel_context);
  
-       ret = i915_ppgtt_init_hw(dev);
+       ret = i915_ppgtt_init_hw(dev_priv);
         if (ret) {
                 DRM_ERROR("PPGTT enable HW failed %d\n", ret);
                 goto out;
         }
  
         /* Need to do basic initialisation of all rings first: */
-       for_each_engine(engine, dev_priv) {
+       for_each_engine(engine, dev_priv, id) {
                 ret = engine->init_hw(engine);
                 if (ret)
                         goto out;
@@ -4507,21 +4469,15 @@ i915_gem_cleanup_engines(struct drm_device *dev)
  {
         struct drm_i915_private *dev_priv = to_i915(dev);
         struct intel_engine_cs *engine;
+       enum intel_engine_id id;
  
-       for_each_engine(engine, dev_priv)
+       for_each_engine(engine, dev_priv, id)
                 dev_priv->gt.cleanup_engine(engine);
  }
  
-static void
-init_engine_lists(struct intel_engine_cs *engine)
-{
-       INIT_LIST_HEAD(&engine->request_list);
-}
-
  void
  i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
  {
-       struct drm_device *dev = &dev_priv->drm;
         int i;
  
         if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
@@ -4545,41 +4501,52 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
                 fence->id = i;
                 list_add_tail(&fence->link, &dev_priv->mm.fence_list);
         }
-       i915_gem_restore_fences(dev);
+       i915_gem_restore_fences(dev_priv);
  
-       i915_gem_detect_bit_6_swizzle(dev);
+       i915_gem_detect_bit_6_swizzle(dev_priv);
  }
  
-void
+int
  i915_gem_load_init(struct drm_device *dev)
  {
         struct drm_i915_private *dev_priv = to_i915(dev);
-       int i;
+       int err = -ENOMEM;
+
+       dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN);
+       if (!dev_priv->objects)
+               goto err_out;
+
+       dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN);
+       if (!dev_priv->vmas)
+               goto err_objects;
  
-       dev_priv->objects =
-               kmem_cache_create("i915_gem_object",
-                                 sizeof(struct drm_i915_gem_object), 0,
-                                 SLAB_HWCACHE_ALIGN,
-                                 NULL);
-       dev_priv->vmas =
-               kmem_cache_create("i915_gem_vma",
-                                 sizeof(struct i915_vma), 0,
-                                 SLAB_HWCACHE_ALIGN,
-                                 NULL);
-       dev_priv->requests =
-               kmem_cache_create("i915_gem_request",
-                                 sizeof(struct drm_i915_gem_request), 0,
-                                 SLAB_HWCACHE_ALIGN |
-                                 SLAB_RECLAIM_ACCOUNT |
-                                 SLAB_DESTROY_BY_RCU,
-                                 NULL);
+       dev_priv->requests = KMEM_CACHE(drm_i915_gem_request,
+                                       SLAB_HWCACHE_ALIGN |
+                                       SLAB_RECLAIM_ACCOUNT |
+                                       SLAB_DESTROY_BY_RCU);
+       if (!dev_priv->requests)
+               goto err_vmas;
+
+       dev_priv->dependencies = KMEM_CACHE(i915_dependency,
+                                           SLAB_HWCACHE_ALIGN |
+                                           SLAB_RECLAIM_ACCOUNT);
+       if (!dev_priv->dependencies)
+               goto err_requests;
+
+       mutex_lock(&dev_priv->drm.struct_mutex);
+       INIT_LIST_HEAD(&dev_priv->gt.timelines);
+       err = i915_gem_timeline_init__global(dev_priv);
+       mutex_unlock(&dev_priv->drm.struct_mutex);
+       if (err)
+               goto err_dependencies;
  
         INIT_LIST_HEAD(&dev_priv->context_list);
+       INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
+       init_llist_head(&dev_priv->mm.free_list);
         INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
         INIT_LIST_HEAD(&dev_priv->mm.bound_list);
         INIT_LIST_HEAD(&dev_priv->mm.fence_list);
-       for (i = 0; i < I915_NUM_ENGINES; i++)
-               init_engine_lists(&dev_priv->engine[i]);
+       INIT_LIST_HEAD(&dev_priv->mm.userfault_list);
         INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
                           i915_gem_retire_work_handler);
         INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
@@ -4596,12 +4563,33 @@ i915_gem_load_init(struct drm_device *dev)
         atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
  
         spin_lock_init(&dev_priv->fb_tracking.lock);
+
+       return 0;
+
+err_dependencies:
+       kmem_cache_destroy(dev_priv->dependencies);
+err_requests:
+       kmem_cache_destroy(dev_priv->requests);
+err_vmas:
+       kmem_cache_destroy(dev_priv->vmas);
+err_objects:
+       kmem_cache_destroy(dev_priv->objects);
+err_out:
+       return err;
  }
  
  void i915_gem_load_cleanup(struct drm_device *dev)
  {
         struct drm_i915_private *dev_priv = to_i915(dev);
  
+       WARN_ON(!llist_empty(&dev_priv->mm.free_list));
+
+       mutex_lock(&dev_priv->drm.struct_mutex);
+       i915_gem_timeline_fini(&dev_priv->gt.global_timeline);
+       WARN_ON(!list_empty(&dev_priv->gt.timelines));
+       mutex_unlock(&dev_priv->drm.struct_mutex);
+
+       kmem_cache_destroy(dev_priv->dependencies);
         kmem_cache_destroy(dev_priv->requests);
         kmem_cache_destroy(dev_priv->vmas);
         kmem_cache_destroy(dev_priv->objects);
@@ -4650,7 +4638,7 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
         i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND);
  
         for (p = phases; *p; p++) {
-               list_for_each_entry(obj, *p, global_list) {
+               list_for_each_entry(obj, *p, global_link) {
                         obj->base.read_domains = I915_GEM_DOMAIN_CPU;
                         obj->base.write_domain = I915_GEM_DOMAIN_CPU;
                 }
@@ -4686,7 +4674,7 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file)
         struct drm_i915_file_private *file_priv;
         int ret;
  
-       DRM_DEBUG_DRIVER("\n");
+       DRM_DEBUG("\n");
  
         file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
         if (!file_priv)
@@ -4742,21 +4730,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
         }
  }
  
-/* Like i915_gem_object_get_page(), but mark the returned page dirty */
-struct page *
-i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
-{
-       struct page *page;
-
-       /* Only default objects have per-page dirty tracking */
-       if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
-               return NULL;
-
-       page = i915_gem_object_get_page(obj, n);
-       set_page_dirty(page);
-       return page;
-}
-
  /* Allocate a new GEM object and fill it with the supplied data */
  struct drm_i915_gem_object *
  i915_gem_object_create_from_data(struct drm_device *dev,
@@ -4775,14 +4748,13 @@ i915_gem_object_create_from_data(struct drm_device *dev,
         if (ret)
                 goto fail;
  
-       ret = i915_gem_object_get_pages(obj);
+       ret = i915_gem_object_pin_pages(obj);
         if (ret)
                 goto fail;
  
-       i915_gem_object_pin_pages(obj);
-       sg = obj->pages;
+       sg = obj->mm.pages;
         bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
-       obj->dirty = 1;         /* Backing store is now out of date */
+       obj->mm.dirty = true; /* Backing store is now out of date */
         i915_gem_object_unpin_pages(obj);
  
         if (WARN_ON(bytes != size)) {
@@ -4797,3 +4769,156 @@ i915_gem_object_create_from_data(struct drm_device *dev,
         i915_gem_object_put(obj);
         return ERR_PTR(ret);
  }
+
+struct scatterlist *
+i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+                      unsigned int n,
+                      unsigned int *offset)
+{
+       struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
+       struct scatterlist *sg;
+       unsigned int idx, count;
+
+       might_sleep();
+       GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
+       GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+       /* As we iterate forward through the sg, we record each entry in a
+        * radixtree for quick repeated (backwards) lookups. If we have seen
+        * this index previously, we will have an entry for it.
+        *
+        * Initial lookup is O(N), but this is amortized to O(1) for
+        * sequential page access (where each new request is consecutive
+        * to the previous one). Repeated lookups are O(lg(obj->base.size)),
+        * i.e. O(1) with a large constant!
+        */
+       if (n < READ_ONCE(iter->sg_idx))
+               goto lookup;
+
+       mutex_lock(&iter->lock);
+
+       /* We prefer to reuse the last sg so that repeated lookup of this
+        * (or the subsequent) sg are fast - comparing against the last
+        * sg is faster than going through the radixtree.
+        */
+
+       sg = iter->sg_pos;
+       idx = iter->sg_idx;
+       count = __sg_page_count(sg);
+
+       while (idx + count <= n) {
+               unsigned long exception, i;
+               int ret;
+
+               /* If we cannot allocate and insert this entry, or the
+                * individual pages from this range, cancel updating the
+                * sg_idx so that on this lookup we are forced to linearly
+                * scan onwards, but on future lookups we will try the
+                * insertion again (in which case we need to be careful of
+                * the error return reporting that we have already inserted
+                * this index).
+                */
+               ret = radix_tree_insert(&iter->radix, idx, sg);
+               if (ret && ret != -EEXIST)
+                       goto scan;
+
+               exception =
+                       RADIX_TREE_EXCEPTIONAL_ENTRY |
+                       idx << RADIX_TREE_EXCEPTIONAL_SHIFT;
+               for (i = 1; i < count; i++) {
+                       ret = radix_tree_insert(&iter->radix, idx + i,
+                                               (void *)exception);
+                       if (ret && ret != -EEXIST)
+                               goto scan;
+               }
+
+               idx += count;
+               sg = ____sg_next(sg);
+               count = __sg_page_count(sg);
+       }
+
+scan:
+       iter->sg_pos = sg;
+       iter->sg_idx = idx;
+
+       mutex_unlock(&iter->lock);
+
+       if (unlikely(n < idx)) /* insertion completed by another thread */
+               goto lookup;
+
+       /* In case we failed to insert the entry into the radixtree, we need
+        * to look beyond the current sg.
+        */
+       while (idx + count <= n) {
+               idx += count;
+               sg = ____sg_next(sg);
+               count = __sg_page_count(sg);
+       }
+
+       *offset = n - idx;
+       return sg;
+
+lookup:
+       rcu_read_lock();
+
+       sg = radix_tree_lookup(&iter->radix, n);
+       GEM_BUG_ON(!sg);
+
+       /* If this index is in the middle of multi-page sg entry,
+        * the radixtree will contain an exceptional entry that points
+        * to the start of that range. We will return the pointer to
+        * the base page and the offset of this page within the
+        * sg entry's range.
+        */
+       *offset = 0;
+       if (unlikely(radix_tree_exception(sg))) {
+               unsigned long base =
+                       (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
+
+               sg = radix_tree_lookup(&iter->radix, base);
+               GEM_BUG_ON(!sg);
+
+               *offset = n - base;
+       }
+
+       rcu_read_unlock();
+
+       return sg;
+}
+
+struct page *
+i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
+{
+       struct scatterlist *sg;
+       unsigned int offset;
+
+       GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
+
+       sg = i915_gem_object_get_sg(obj, n, &offset);
+       return nth_page(sg_page(sg), offset);
+}
+
+/* Like i915_gem_object_get_page(), but mark the returned page dirty */
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+                              unsigned int n)
+{
+       struct page *page;
+
+       page = i915_gem_object_get_page(obj, n);
+       if (!obj->mm.dirty)
+               set_page_dirty(page);
+
+       return page;
+}
+
+dma_addr_t
+i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
+                               unsigned long n)
+{
+       struct scatterlist *sg;
+       unsigned int offset;
+
+       sg = i915_gem_object_get_sg(obj, n, &offset);
+       return sg_dma_address(sg) + (offset << PAGE_SHIFT);
+}