]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
drm/i915: Move GEM domain management to its own file
authorChris Wilson <chris@chris-wilson.co.uk>
Tue, 28 May 2019 09:29:48 +0000 (10:29 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Tue, 28 May 2019 11:45:29 +0000 (12:45 +0100)
Continuing the decluttering of i915_gem.c, that of the read/write
domains, perhaps the biggest of GEM's follies?

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190528092956.14910-7-chris@chris-wilson.co.uk
13 files changed:
drivers/gpu/drm/i915/Makefile
drivers/gpu/drm/i915/gem/i915_gem_domain.c [new file with mode: 0644]
drivers/gpu/drm/i915/gem/i915_gem_object.h
drivers/gpu/drm/i915/gvt/cmd_parser.c
drivers/gpu/drm/i915/gvt/scheduler.c
drivers/gpu/drm/i915/i915_cmd_parser.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/gpu/drm/i915/i915_gem_render_state.c
drivers/gpu/drm/i915/selftests/huge_pages.c
drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
drivers/gpu/drm/i915/selftests/i915_gem_context.c

index 4c14628dc943a7650038e267cc5b15a8babe85fd..5ffd7e9b19adc6144121c8ca3a2f2d798acb748b 100644 (file)
@@ -87,6 +87,7 @@ i915-y += $(gt-y)
 # GEM (Graphics Execution Management) code
 obj-y += gem/
 gem-y += \
+       gem/i915_gem_domain.o \
        gem/i915_gem_object.o \
        gem/i915_gem_mman.o \
        gem/i915_gem_pages.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
new file mode 100644 (file)
index 0000000..bbc7fb7
--- /dev/null
@@ -0,0 +1,782 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_gem_clflush.h"
+#include "i915_gem_gtt.h"
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+#include "i915_vma.h"
+#include "intel_frontbuffer.h"
+
+static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
+{
+       /*
+        * We manually flush the CPU domain so that we can override and
+        * force the flush for the display, and perform it asyncrhonously.
+        */
+       i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+       if (obj->cache_dirty)
+               i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
+       obj->write_domain = 0;
+}
+
+void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
+{
+       if (!READ_ONCE(obj->pin_global))
+               return;
+
+       mutex_lock(&obj->base.dev->struct_mutex);
+       __i915_gem_object_flush_for_display(obj);
+       mutex_unlock(&obj->base.dev->struct_mutex);
+}
+
+/**
+ * Moves a single object to the WC read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
+{
+       int ret;
+
+       lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+       ret = i915_gem_object_wait(obj,
+                                  I915_WAIT_INTERRUPTIBLE |
+                                  I915_WAIT_LOCKED |
+                                  (write ? I915_WAIT_ALL : 0),
+                                  MAX_SCHEDULE_TIMEOUT);
+       if (ret)
+               return ret;
+
+       if (obj->write_domain == I915_GEM_DOMAIN_WC)
+               return 0;
+
+       /* Flush and acquire obj->pages so that we are coherent through
+        * direct access in memory with previous cached writes through
+        * shmemfs and that our cache domain tracking remains valid.
+        * For example, if the obj->filp was moved to swap without us
+        * being notified and releasing the pages, we would mistakenly
+        * continue to assume that the obj remained out of the CPU cached
+        * domain.
+        */
+       ret = i915_gem_object_pin_pages(obj);
+       if (ret)
+               return ret;
+
+       i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
+
+       /* Serialise direct access to this object with the barriers for
+        * coherent writes from the GPU, by effectively invalidating the
+        * WC domain upon first access.
+        */
+       if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
+               mb();
+
+       /* It should now be out of any other write domains, and we can update
+        * the domain values for our changes.
+        */
+       GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
+       obj->read_domains |= I915_GEM_DOMAIN_WC;
+       if (write) {
+               obj->read_domains = I915_GEM_DOMAIN_WC;
+               obj->write_domain = I915_GEM_DOMAIN_WC;
+               obj->mm.dirty = true;
+       }
+
+       i915_gem_object_unpin_pages(obj);
+       return 0;
+}
+
+/**
+ * Moves a single object to the GTT read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
+{
+       int ret;
+
+       lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+       ret = i915_gem_object_wait(obj,
+                                  I915_WAIT_INTERRUPTIBLE |
+                                  I915_WAIT_LOCKED |
+                                  (write ? I915_WAIT_ALL : 0),
+                                  MAX_SCHEDULE_TIMEOUT);
+       if (ret)
+               return ret;
+
+       if (obj->write_domain == I915_GEM_DOMAIN_GTT)
+               return 0;
+
+       /* Flush and acquire obj->pages so that we are coherent through
+        * direct access in memory with previous cached writes through
+        * shmemfs and that our cache domain tracking remains valid.
+        * For example, if the obj->filp was moved to swap without us
+        * being notified and releasing the pages, we would mistakenly
+        * continue to assume that the obj remained out of the CPU cached
+        * domain.
+        */
+       ret = i915_gem_object_pin_pages(obj);
+       if (ret)
+               return ret;
+
+       i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
+
+       /* Serialise direct access to this object with the barriers for
+        * coherent writes from the GPU, by effectively invalidating the
+        * GTT domain upon first access.
+        */
+       if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
+               mb();
+
+       /* It should now be out of any other write domains, and we can update
+        * the domain values for our changes.
+        */
+       GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
+       obj->read_domains |= I915_GEM_DOMAIN_GTT;
+       if (write) {
+               obj->read_domains = I915_GEM_DOMAIN_GTT;
+               obj->write_domain = I915_GEM_DOMAIN_GTT;
+               obj->mm.dirty = true;
+       }
+
+       i915_gem_object_unpin_pages(obj);
+       return 0;
+}
+
+/**
+ * Changes the cache-level of an object across all VMA.
+ * @obj: object to act on
+ * @cache_level: new cache level to set for the object
+ *
+ * After this function returns, the object will be in the new cache-level
+ * across all GTT and the contents of the backing storage will be coherent,
+ * with respect to the new cache-level. In order to keep the backing storage
+ * coherent for all users, we only allow a single cache level to be set
+ * globally on the object and prevent it from being changed whilst the
+ * hardware is reading from the object. That is if the object is currently
+ * on the scanout it will be set to uncached (or equivalent display
+ * cache coherency) and all non-MOCS GPU access will also be uncached so
+ * that all direct access to the scanout remains coherent.
+ */
+int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
+                                   enum i915_cache_level cache_level)
+{
+       struct i915_vma *vma;
+       int ret;
+
+       lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+       if (obj->cache_level == cache_level)
+               return 0;
+
+       /* Inspect the list of currently bound VMA and unbind any that would
+        * be invalid given the new cache-level. This is principally to
+        * catch the issue of the CS prefetch crossing page boundaries and
+        * reading an invalid PTE on older architectures.
+        */
+restart:
+       list_for_each_entry(vma, &obj->vma.list, obj_link) {
+               if (!drm_mm_node_allocated(&vma->node))
+                       continue;
+
+               if (i915_vma_is_pinned(vma)) {
+                       DRM_DEBUG("can not change the cache level of pinned objects\n");
+                       return -EBUSY;
+               }
+
+               if (!i915_vma_is_closed(vma) &&
+                   i915_gem_valid_gtt_space(vma, cache_level))
+                       continue;
+
+               ret = i915_vma_unbind(vma);
+               if (ret)
+                       return ret;
+
+               /* As unbinding may affect other elements in the
+                * obj->vma_list (due to side-effects from retiring
+                * an active vma), play safe and restart the iterator.
+                */
+               goto restart;
+       }
+
+       /* We can reuse the existing drm_mm nodes but need to change the
+        * cache-level on the PTE. We could simply unbind them all and
+        * rebind with the correct cache-level on next use. However since
+        * we already have a valid slot, dma mapping, pages etc, we may as
+        * rewrite the PTE in the belief that doing so tramples upon less
+        * state and so involves less work.
+        */
+       if (obj->bind_count) {
+               /* Before we change the PTE, the GPU must not be accessing it.
+                * If we wait upon the object, we know that all the bound
+                * VMA are no longer active.
+                */
+               ret = i915_gem_object_wait(obj,
+                                          I915_WAIT_INTERRUPTIBLE |
+                                          I915_WAIT_LOCKED |
+                                          I915_WAIT_ALL,
+                                          MAX_SCHEDULE_TIMEOUT);
+               if (ret)
+                       return ret;
+
+               if (!HAS_LLC(to_i915(obj->base.dev)) &&
+                   cache_level != I915_CACHE_NONE) {
+                       /* Access to snoopable pages through the GTT is
+                        * incoherent and on some machines causes a hard
+                        * lockup. Relinquish the CPU mmaping to force
+                        * userspace to refault in the pages and we can
+                        * then double check if the GTT mapping is still
+                        * valid for that pointer access.
+                        */
+                       i915_gem_object_release_mmap(obj);
+
+                       /* As we no longer need a fence for GTT access,
+                        * we can relinquish it now (and so prevent having
+                        * to steal a fence from someone else on the next
+                        * fence request). Note GPU activity would have
+                        * dropped the fence as all snoopable access is
+                        * supposed to be linear.
+                        */
+                       for_each_ggtt_vma(vma, obj) {
+                               ret = i915_vma_put_fence(vma);
+                               if (ret)
+                                       return ret;
+                       }
+               } else {
+                       /* We either have incoherent backing store and
+                        * so no GTT access or the architecture is fully
+                        * coherent. In such cases, existing GTT mmaps
+                        * ignore the cache bit in the PTE and we can
+                        * rewrite it without confusing the GPU or having
+                        * to force userspace to fault back in its mmaps.
+                        */
+               }
+
+               list_for_each_entry(vma, &obj->vma.list, obj_link) {
+                       if (!drm_mm_node_allocated(&vma->node))
+                               continue;
+
+                       ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       list_for_each_entry(vma, &obj->vma.list, obj_link)
+               vma->node.color = cache_level;
+       i915_gem_object_set_cache_coherency(obj, cache_level);
+       obj->cache_dirty = true; /* Always invalidate stale cachelines */
+
+       return 0;
+}
+
+int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
+                              struct drm_file *file)
+{
+       struct drm_i915_gem_caching *args = data;
+       struct drm_i915_gem_object *obj;
+       int err = 0;
+
+       rcu_read_lock();
+       obj = i915_gem_object_lookup_rcu(file, args->handle);
+       if (!obj) {
+               err = -ENOENT;
+               goto out;
+       }
+
+       switch (obj->cache_level) {
+       case I915_CACHE_LLC:
+       case I915_CACHE_L3_LLC:
+               args->caching = I915_CACHING_CACHED;
+               break;
+
+       case I915_CACHE_WT:
+               args->caching = I915_CACHING_DISPLAY;
+               break;
+
+       default:
+               args->caching = I915_CACHING_NONE;
+               break;
+       }
+out:
+       rcu_read_unlock();
+       return err;
+}
+
+int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
+                              struct drm_file *file)
+{
+       struct drm_i915_private *i915 = to_i915(dev);
+       struct drm_i915_gem_caching *args = data;
+       struct drm_i915_gem_object *obj;
+       enum i915_cache_level level;
+       int ret = 0;
+
+       switch (args->caching) {
+       case I915_CACHING_NONE:
+               level = I915_CACHE_NONE;
+               break;
+       case I915_CACHING_CACHED:
+               /*
+                * Due to a HW issue on BXT A stepping, GPU stores via a
+                * snooped mapping may leave stale data in a corresponding CPU
+                * cacheline, whereas normally such cachelines would get
+                * invalidated.
+                */
+               if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
+                       return -ENODEV;
+
+               level = I915_CACHE_LLC;
+               break;
+       case I915_CACHING_DISPLAY:
+               level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       obj = i915_gem_object_lookup(file, args->handle);
+       if (!obj)
+               return -ENOENT;
+
+       /*
+        * The caching mode of proxy object is handled by its generator, and
+        * not allowed to be changed by userspace.
+        */
+       if (i915_gem_object_is_proxy(obj)) {
+               ret = -ENXIO;
+               goto out;
+       }
+
+       if (obj->cache_level == level)
+               goto out;
+
+       ret = i915_gem_object_wait(obj,
+                                  I915_WAIT_INTERRUPTIBLE,
+                                  MAX_SCHEDULE_TIMEOUT);
+       if (ret)
+               goto out;
+
+       ret = i915_mutex_lock_interruptible(dev);
+       if (ret)
+               goto out;
+
+       ret = i915_gem_object_set_cache_level(obj, level);
+       mutex_unlock(&dev->struct_mutex);
+
+out:
+       i915_gem_object_put(obj);
+       return ret;
+}
+
+/*
+ * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
+ * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
+ * (for pageflips). We only flush the caches while preparing the buffer for
+ * display, the callers are responsible for frontbuffer flush.
+ */
+struct i915_vma *
+i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+                                    u32 alignment,
+                                    const struct i915_ggtt_view *view,
+                                    unsigned int flags)
+{
+       struct i915_vma *vma;
+       int ret;
+
+       lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+       /* Mark the global pin early so that we account for the
+        * display coherency whilst setting up the cache domains.
+        */
+       obj->pin_global++;
+
+       /* The display engine is not coherent with the LLC cache on gen6.  As
+        * a result, we make sure that the pinning that is about to occur is
+        * done with uncached PTEs. This is lowest common denominator for all
+        * chipsets.
+        *
+        * However for gen6+, we could do better by using the GFDT bit instead
+        * of uncaching, which would allow us to flush all the LLC-cached data
+        * with that bit in the PTE to main memory with just one PIPE_CONTROL.
+        */
+       ret = i915_gem_object_set_cache_level(obj,
+                                             HAS_WT(to_i915(obj->base.dev)) ?
+                                             I915_CACHE_WT : I915_CACHE_NONE);
+       if (ret) {
+               vma = ERR_PTR(ret);
+               goto err_unpin_global;
+       }
+
+       /* As the user may map the buffer once pinned in the display plane
+        * (e.g. libkms for the bootup splash), we have to ensure that we
+        * always use map_and_fenceable for all scanout buffers. However,
+        * it may simply be too big to fit into mappable, in which case
+        * put it anyway and hope that userspace can cope (but always first
+        * try to preserve the existing ABI).
+        */
+       vma = ERR_PTR(-ENOSPC);
+       if ((flags & PIN_MAPPABLE) == 0 &&
+           (!view || view->type == I915_GGTT_VIEW_NORMAL))
+               vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
+                                              flags |
+                                              PIN_MAPPABLE |
+                                              PIN_NONBLOCK);
+       if (IS_ERR(vma))
+               vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
+       if (IS_ERR(vma))
+               goto err_unpin_global;
+
+       vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
+
+       __i915_gem_object_flush_for_display(obj);
+
+       /* It should now be out of any other write domains, and we can update
+        * the domain values for our changes.
+        */
+       obj->read_domains |= I915_GEM_DOMAIN_GTT;
+
+       return vma;
+
+err_unpin_global:
+       obj->pin_global--;
+       return vma;
+}
+
+static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
+{
+       struct drm_i915_private *i915 = to_i915(obj->base.dev);
+       struct list_head *list;
+       struct i915_vma *vma;
+
+       GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+       mutex_lock(&i915->ggtt.vm.mutex);
+       for_each_ggtt_vma(vma, obj) {
+               if (!drm_mm_node_allocated(&vma->node))
+                       continue;
+
+               list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+       }
+       mutex_unlock(&i915->ggtt.vm.mutex);
+
+       spin_lock(&i915->mm.obj_lock);
+       list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
+       list_move_tail(&obj->mm.link, list);
+       spin_unlock(&i915->mm.obj_lock);
+}
+
+void
+i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
+{
+       lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+
+       if (WARN_ON(vma->obj->pin_global == 0))
+               return;
+
+       if (--vma->obj->pin_global == 0)
+               vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
+
+       /* Bump the LRU to try and avoid premature eviction whilst flipping  */
+       i915_gem_object_bump_inactive_ggtt(vma->obj);
+
+       i915_vma_unpin(vma);
+}
+
+/**
+ * Moves a single object to the CPU read, and possibly write domain.
+ * @obj: object to act on
+ * @write: requesting write or read-only access
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
+{
+       int ret;
+
+       lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+       ret = i915_gem_object_wait(obj,
+                                  I915_WAIT_INTERRUPTIBLE |
+                                  I915_WAIT_LOCKED |
+                                  (write ? I915_WAIT_ALL : 0),
+                                  MAX_SCHEDULE_TIMEOUT);
+       if (ret)
+               return ret;
+
+       i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+       /* Flush the CPU cache if it's still invalid. */
+       if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
+               i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+               obj->read_domains |= I915_GEM_DOMAIN_CPU;
+       }
+
+       /* It should now be out of any other write domains, and we can update
+        * the domain values for our changes.
+        */
+       GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
+
+       /* If we're writing through the CPU, then the GPU read domains will
+        * need to be invalidated at next use.
+        */
+       if (write)
+               __start_cpu_write(obj);
+
+       return 0;
+}
+
+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+       return (domain == I915_GEM_DOMAIN_GTT ?
+               obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
+/**
+ * Called when user space prepares to use an object with the CPU, either
+ * through the mmap ioctl's mapping or a GTT mapping.
+ * @dev: drm device
+ * @data: ioctl data blob
+ * @file: drm file
+ */
+int
+i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
+                         struct drm_file *file)
+{
+       struct drm_i915_gem_set_domain *args = data;
+       struct drm_i915_gem_object *obj;
+       u32 read_domains = args->read_domains;
+       u32 write_domain = args->write_domain;
+       int err;
+
+       /* Only handle setting domains to types used by the CPU. */
+       if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
+               return -EINVAL;
+
+       /*
+        * Having something in the write domain implies it's in the read
+        * domain, and only that read domain.  Enforce that in the request.
+        */
+       if (write_domain && read_domains != write_domain)
+               return -EINVAL;
+
+       if (!read_domains)
+               return 0;
+
+       obj = i915_gem_object_lookup(file, args->handle);
+       if (!obj)
+               return -ENOENT;
+
+       /*
+        * Already in the desired write domain? Nothing for us to do!
+        *
+        * We apply a little bit of cunning here to catch a broader set of
+        * no-ops. If obj->write_domain is set, we must be in the same
+        * obj->read_domains, and only that domain. Therefore, if that
+        * obj->write_domain matches the request read_domains, we are
+        * already in the same read/write domain and can skip the operation,
+        * without having to further check the requested write_domain.
+        */
+       if (READ_ONCE(obj->write_domain) == read_domains) {
+               err = 0;
+               goto out;
+       }
+
+       /*
+        * Try to flush the object off the GPU without holding the lock.
+        * We will repeat the flush holding the lock in the normal manner
+        * to catch cases where we are gazumped.
+        */
+       err = i915_gem_object_wait(obj,
+                                  I915_WAIT_INTERRUPTIBLE |
+                                  I915_WAIT_PRIORITY |
+                                  (write_domain ? I915_WAIT_ALL : 0),
+                                  MAX_SCHEDULE_TIMEOUT);
+       if (err)
+               goto out;
+
+       /*
+        * Proxy objects do not control access to the backing storage, ergo
+        * they cannot be used as a means to manipulate the cache domain
+        * tracking for that backing storage. The proxy object is always
+        * considered to be outside of any cache domain.
+        */
+       if (i915_gem_object_is_proxy(obj)) {
+               err = -ENXIO;
+               goto out;
+       }
+
+       /*
+        * Flush and acquire obj->pages so that we are coherent through
+        * direct access in memory with previous cached writes through
+        * shmemfs and that our cache domain tracking remains valid.
+        * For example, if the obj->filp was moved to swap without us
+        * being notified and releasing the pages, we would mistakenly
+        * continue to assume that the obj remained out of the CPU cached
+        * domain.
+        */
+       err = i915_gem_object_pin_pages(obj);
+       if (err)
+               goto out;
+
+       err = i915_mutex_lock_interruptible(dev);
+       if (err)
+               goto out_unpin;
+
+       if (read_domains & I915_GEM_DOMAIN_WC)
+               err = i915_gem_object_set_to_wc_domain(obj, write_domain);
+       else if (read_domains & I915_GEM_DOMAIN_GTT)
+               err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
+       else
+               err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
+
+       /* And bump the LRU for this access */
+       i915_gem_object_bump_inactive_ggtt(obj);
+
+       mutex_unlock(&dev->struct_mutex);
+
+       if (write_domain != 0)
+               intel_fb_obj_invalidate(obj,
+                                       fb_write_origin(obj, write_domain));
+
+out_unpin:
+       i915_gem_object_unpin_pages(obj);
+out:
+       i915_gem_object_put(obj);
+       return err;
+}
+
+/*
+ * Pins the specified object's pages and synchronizes the object with
+ * GPU accesses. Sets needs_clflush to non-zero if the caller should
+ * flush the object from the CPU cache.
+ */
+int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
+                                unsigned int *needs_clflush)
+{
+       int ret;
+
+       lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+       *needs_clflush = 0;
+       if (!i915_gem_object_has_struct_page(obj))
+               return -ENODEV;
+
+       ret = i915_gem_object_wait(obj,
+                                  I915_WAIT_INTERRUPTIBLE |
+                                  I915_WAIT_LOCKED,
+                                  MAX_SCHEDULE_TIMEOUT);
+       if (ret)
+               return ret;
+
+       ret = i915_gem_object_pin_pages(obj);
+       if (ret)
+               return ret;
+
+       if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
+           !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+               ret = i915_gem_object_set_to_cpu_domain(obj, false);
+               if (ret)
+                       goto err_unpin;
+               else
+                       goto out;
+       }
+
+       i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+       /* If we're not in the cpu read domain, set ourself into the gtt
+        * read domain and manually flush cachelines (if required). This
+        * optimizes for the case when the gpu will dirty the data
+        * anyway again before the next pread happens.
+        */
+       if (!obj->cache_dirty &&
+           !(obj->read_domains & I915_GEM_DOMAIN_CPU))
+               *needs_clflush = CLFLUSH_BEFORE;
+
+out:
+       /* return with the pages pinned */
+       return 0;
+
+err_unpin:
+       i915_gem_object_unpin_pages(obj);
+       return ret;
+}
+
+int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
+                                 unsigned int *needs_clflush)
+{
+       int ret;
+
+       lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+       *needs_clflush = 0;
+       if (!i915_gem_object_has_struct_page(obj))
+               return -ENODEV;
+
+       ret = i915_gem_object_wait(obj,
+                                  I915_WAIT_INTERRUPTIBLE |
+                                  I915_WAIT_LOCKED |
+                                  I915_WAIT_ALL,
+                                  MAX_SCHEDULE_TIMEOUT);
+       if (ret)
+               return ret;
+
+       ret = i915_gem_object_pin_pages(obj);
+       if (ret)
+               return ret;
+
+       if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
+           !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+               ret = i915_gem_object_set_to_cpu_domain(obj, true);
+               if (ret)
+                       goto err_unpin;
+               else
+                       goto out;
+       }
+
+       i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+       /* If we're not in the cpu write domain, set ourself into the
+        * gtt write domain and manually flush cachelines (as required).
+        * This optimizes for the case when the gpu will use the data
+        * right away and we therefore have to clflush anyway.
+        */
+       if (!obj->cache_dirty) {
+               *needs_clflush |= CLFLUSH_AFTER;
+
+               /*
+                * Same trick applies to invalidate partially written
+                * cachelines read before writing.
+                */
+               if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
+                       *needs_clflush |= CLFLUSH_BEFORE;
+       }
+
+out:
+       intel_fb_obj_invalidate(obj, ORIGIN_CPU);
+       obj->mm.dirty = true;
+       /* return with the pages pinned */
+       return 0;
+
+err_unpin:
+       i915_gem_object_unpin_pages(obj);
+       return ret;
+}
index 07f487cbff79a377c38459065e146f2eb0582bd2..8cf082abb0ab947de8144bac5c25ceab50f8b391 100644 (file)
@@ -15,6 +15,8 @@
 
 #include "i915_gem_object_types.h"
 
+#include "i915_gem_gtt.h"
+
 void i915_gem_init__objects(struct drm_i915_private *i915);
 
 struct drm_i915_gem_object *i915_gem_object_alloc(void);
@@ -358,6 +360,20 @@ void
 i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
                                   unsigned int flush_domains);
 
+int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
+                                unsigned int *needs_clflush);
+int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
+                                 unsigned int *needs_clflush);
+#define CLFLUSH_BEFORE BIT(0)
+#define CLFLUSH_AFTER  BIT(1)
+#define CLFLUSH_FLAGS  (CLFLUSH_BEFORE | CLFLUSH_AFTER)
+
+static inline void
+i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
+{
+       i915_gem_object_unpin_pages(obj);
+}
+
 static inline struct intel_engine_cs *
 i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
 {
@@ -379,6 +395,19 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
                                         unsigned int cache_level);
 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
 
+int __must_check
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
+int __must_check
+i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
+int __must_check
+i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
+struct i915_vma * __must_check
+i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+                                    u32 alignment,
+                                    const struct i915_ggtt_view *view,
+                                    unsigned int flags);
+void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
+
 static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 {
        if (obj->cache_dirty)
index 7584bf0aeaa436ea73c729244fb08c0894d5ab2d..e3608b170105eb6dab7e317b9f64ed7663a9f569 100644 (file)
@@ -1764,7 +1764,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
                goto err_free_bb;
        }
 
-       ret = i915_gem_obj_prepare_shmem_write(bb->obj, &bb->clflush);
+       ret = i915_gem_object_prepare_write(bb->obj, &bb->clflush);
        if (ret)
                goto err_free_obj;
 
@@ -1813,7 +1813,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 err_unmap:
        i915_gem_object_unpin_map(bb->obj);
 err_finish_shmem_access:
-       i915_gem_obj_finish_shmem_access(bb->obj);
+       i915_gem_object_finish_access(bb->obj);
 err_free_obj:
        i915_gem_object_put(bb->obj);
 err_free_bb:
index 38897d241f5f161aa22af929a17f25ab8373d0b1..3a691447f76cd18d9ecec62e854ee5f7424f304d 100644 (file)
@@ -482,7 +482,7 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
                                                bb->obj->base.size);
                                bb->clflush &= ~CLFLUSH_AFTER;
                        }
-                       i915_gem_obj_finish_shmem_access(bb->obj);
+                       i915_gem_object_finish_access(bb->obj);
                        bb->accessing = false;
 
                } else {
@@ -510,7 +510,7 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
                        if (ret)
                                goto err;
 
-                       i915_gem_obj_finish_shmem_access(bb->obj);
+                       i915_gem_object_finish_access(bb->obj);
                        bb->accessing = false;
 
                        ret = i915_vma_move_to_active(bb->vma,
@@ -588,7 +588,7 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
        list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) {
                if (bb->obj) {
                        if (bb->accessing)
-                               i915_gem_obj_finish_shmem_access(bb->obj);
+                               i915_gem_object_finish_access(bb->obj);
 
                        if (bb->va && !IS_ERR(bb->va))
                                i915_gem_object_unpin_map(bb->obj);
index e9fadcb4d59248ae34e0bbb7c3d6c4a47e92ee1a..c893bd4eb2c8e1cf566c6c93836b5e9441cac89d 100644 (file)
@@ -1058,11 +1058,11 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
        void *dst, *src;
        int ret;
 
-       ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush);
+       ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush);
        if (ret)
                return ERR_PTR(ret);
 
-       ret = i915_gem_obj_prepare_shmem_write(dst_obj, &dst_needs_clflush);
+       ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
        if (ret) {
                dst = ERR_PTR(ret);
                goto unpin_src;
@@ -1120,9 +1120,9 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
        *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
 
 unpin_dst:
-       i915_gem_obj_finish_shmem_access(dst_obj);
+       i915_gem_object_finish_access(dst_obj);
 unpin_src:
-       i915_gem_obj_finish_shmem_access(src_obj);
+       i915_gem_object_finish_access(src_obj);
        return dst;
 }
 
index 584ebb901e183be4e7f91862d076387a255fcaf2..596af542afeaa99381ee13f6222d197d6eafaea2 100644 (file)
@@ -2814,20 +2814,6 @@ static inline int __sg_page_count(const struct scatterlist *sg)
        return sg->length >> PAGE_SHIFT;
 }
 
-int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
-                                   unsigned int *needs_clflush);
-int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
-                                    unsigned int *needs_clflush);
-#define CLFLUSH_BEFORE BIT(0)
-#define CLFLUSH_AFTER  BIT(1)
-#define CLFLUSH_FLAGS  (CLFLUSH_BEFORE | CLFLUSH_AFTER)
-
-static inline void
-i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
-{
-       i915_gem_object_unpin_pages(obj);
-}
-
 static inline int __must_check
 i915_mutex_lock_interruptible(struct drm_device *dev)
 {
@@ -2890,18 +2876,6 @@ int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
                                  const struct i915_sched_attr *attr);
 #define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX)
 
-int __must_check
-i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
-int __must_check
-i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
-int __must_check
-i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
-struct i915_vma * __must_check
-i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
-                                    u32 alignment,
-                                    const struct i915_ggtt_view *view,
-                                    unsigned int flags);
-void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file);
 void i915_gem_release(struct drm_device *dev, struct drm_file *file);
 
index bde25d5326ba509f7c14bb5a03e735da96e77e60..0570907cc9d20d609305680ba6bbb4f477315d11 100644 (file)
@@ -462,123 +462,6 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
        }
 }
 
-/*
- * Pins the specified object's pages and synchronizes the object with
- * GPU accesses. Sets needs_clflush to non-zero if the caller should
- * flush the object from the CPU cache.
- */
-int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
-                                   unsigned int *needs_clflush)
-{
-       int ret;
-
-       lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-       *needs_clflush = 0;
-       if (!i915_gem_object_has_struct_page(obj))
-               return -ENODEV;
-
-       ret = i915_gem_object_wait(obj,
-                                  I915_WAIT_INTERRUPTIBLE |
-                                  I915_WAIT_LOCKED,
-                                  MAX_SCHEDULE_TIMEOUT);
-       if (ret)
-               return ret;
-
-       ret = i915_gem_object_pin_pages(obj);
-       if (ret)
-               return ret;
-
-       if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
-           !static_cpu_has(X86_FEATURE_CLFLUSH)) {
-               ret = i915_gem_object_set_to_cpu_domain(obj, false);
-               if (ret)
-                       goto err_unpin;
-               else
-                       goto out;
-       }
-
-       i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
-       /* If we're not in the cpu read domain, set ourself into the gtt
-        * read domain and manually flush cachelines (if required). This
-        * optimizes for the case when the gpu will dirty the data
-        * anyway again before the next pread happens.
-        */
-       if (!obj->cache_dirty &&
-           !(obj->read_domains & I915_GEM_DOMAIN_CPU))
-               *needs_clflush = CLFLUSH_BEFORE;
-
-out:
-       /* return with the pages pinned */
-       return 0;
-
-err_unpin:
-       i915_gem_object_unpin_pages(obj);
-       return ret;
-}
-
-int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
-                                    unsigned int *needs_clflush)
-{
-       int ret;
-
-       lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-       *needs_clflush = 0;
-       if (!i915_gem_object_has_struct_page(obj))
-               return -ENODEV;
-
-       ret = i915_gem_object_wait(obj,
-                                  I915_WAIT_INTERRUPTIBLE |
-                                  I915_WAIT_LOCKED |
-                                  I915_WAIT_ALL,
-                                  MAX_SCHEDULE_TIMEOUT);
-       if (ret)
-               return ret;
-
-       ret = i915_gem_object_pin_pages(obj);
-       if (ret)
-               return ret;
-
-       if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
-           !static_cpu_has(X86_FEATURE_CLFLUSH)) {
-               ret = i915_gem_object_set_to_cpu_domain(obj, true);
-               if (ret)
-                       goto err_unpin;
-               else
-                       goto out;
-       }
-
-       i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
-       /* If we're not in the cpu write domain, set ourself into the
-        * gtt write domain and manually flush cachelines (as required).
-        * This optimizes for the case when the gpu will use the data
-        * right away and we therefore have to clflush anyway.
-        */
-       if (!obj->cache_dirty) {
-               *needs_clflush |= CLFLUSH_AFTER;
-
-               /*
-                * Same trick applies to invalidate partially written
-                * cachelines read before writing.
-                */
-               if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
-                       *needs_clflush |= CLFLUSH_BEFORE;
-       }
-
-out:
-       intel_fb_obj_invalidate(obj, ORIGIN_CPU);
-       obj->mm.dirty = true;
-       /* return with the pages pinned */
-       return 0;
-
-err_unpin:
-       i915_gem_object_unpin_pages(obj);
-       return ret;
-}
-
 static int
 shmem_pread(struct page *page, int offset, int len, char __user *user_data,
            bool needs_clflush)
@@ -612,7 +495,7 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
        if (ret)
                return ret;
 
-       ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
+       ret = i915_gem_object_prepare_read(obj, &needs_clflush);
        mutex_unlock(&obj->base.dev->struct_mutex);
        if (ret)
                return ret;
@@ -634,7 +517,7 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
                offset = 0;
        }
 
-       i915_gem_obj_finish_shmem_access(obj);
+       i915_gem_object_finish_access(obj);
        return ret;
 }
 
@@ -1009,7 +892,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
        if (ret)
                return ret;
 
-       ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
+       ret = i915_gem_object_prepare_write(obj, &needs_clflush);
        mutex_unlock(&i915->drm.struct_mutex);
        if (ret)
                return ret;
@@ -1041,7 +924,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
        }
 
        intel_fb_obj_flush(obj, ORIGIN_CPU);
-       i915_gem_obj_finish_shmem_access(obj);
+       i915_gem_object_finish_access(obj);
        return ret;
 }
 
@@ -1130,150 +1013,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
        return ret;
 }
 
-static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
-{
-       struct drm_i915_private *i915 = to_i915(obj->base.dev);
-       struct list_head *list;
-       struct i915_vma *vma;
-
-       GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
-
-       mutex_lock(&i915->ggtt.vm.mutex);
-       for_each_ggtt_vma(vma, obj) {
-               if (!drm_mm_node_allocated(&vma->node))
-                       continue;
-
-               list_move_tail(&vma->vm_link, &vma->vm->bound_list);
-       }
-       mutex_unlock(&i915->ggtt.vm.mutex);
-
-       spin_lock(&i915->mm.obj_lock);
-       list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
-       list_move_tail(&obj->mm.link, list);
-       spin_unlock(&i915->mm.obj_lock);
-}
-
-static inline enum fb_op_origin
-fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
-{
-       return (domain == I915_GEM_DOMAIN_GTT ?
-               obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
-/**
- * Called when user space prepares to use an object with the CPU, either
- * through the mmap ioctl's mapping or a GTT mapping.
- * @dev: drm device
- * @data: ioctl data blob
- * @file: drm file
- */
-int
-i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
-                         struct drm_file *file)
-{
-       struct drm_i915_gem_set_domain *args = data;
-       struct drm_i915_gem_object *obj;
-       u32 read_domains = args->read_domains;
-       u32 write_domain = args->write_domain;
-       int err;
-
-       /* Only handle setting domains to types used by the CPU. */
-       if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
-               return -EINVAL;
-
-       /*
-        * Having something in the write domain implies it's in the read
-        * domain, and only that read domain.  Enforce that in the request.
-        */
-       if (write_domain && read_domains != write_domain)
-               return -EINVAL;
-
-       if (!read_domains)
-               return 0;
-
-       obj = i915_gem_object_lookup(file, args->handle);
-       if (!obj)
-               return -ENOENT;
-
-       /*
-        * Already in the desired write domain? Nothing for us to do!
-        *
-        * We apply a little bit of cunning here to catch a broader set of
-        * no-ops. If obj->write_domain is set, we must be in the same
-        * obj->read_domains, and only that domain. Therefore, if that
-        * obj->write_domain matches the request read_domains, we are
-        * already in the same read/write domain and can skip the operation,
-        * without having to further check the requested write_domain.
-        */
-       if (READ_ONCE(obj->write_domain) == read_domains) {
-               err = 0;
-               goto out;
-       }
-
-       /*
-        * Try to flush the object off the GPU without holding the lock.
-        * We will repeat the flush holding the lock in the normal manner
-        * to catch cases where we are gazumped.
-        */
-       err = i915_gem_object_wait(obj,
-                                  I915_WAIT_INTERRUPTIBLE |
-                                  I915_WAIT_PRIORITY |
-                                  (write_domain ? I915_WAIT_ALL : 0),
-                                  MAX_SCHEDULE_TIMEOUT);
-       if (err)
-               goto out;
-
-       /*
-        * Proxy objects do not control access to the backing storage, ergo
-        * they cannot be used as a means to manipulate the cache domain
-        * tracking for that backing storage. The proxy object is always
-        * considered to be outside of any cache domain.
-        */
-       if (i915_gem_object_is_proxy(obj)) {
-               err = -ENXIO;
-               goto out;
-       }
-
-       /*
-        * Flush and acquire obj->pages so that we are coherent through
-        * direct access in memory with previous cached writes through
-        * shmemfs and that our cache domain tracking remains valid.
-        * For example, if the obj->filp was moved to swap without us
-        * being notified and releasing the pages, we would mistakenly
-        * continue to assume that the obj remained out of the CPU cached
-        * domain.
-        */
-       err = i915_gem_object_pin_pages(obj);
-       if (err)
-               goto out;
-
-       err = i915_mutex_lock_interruptible(dev);
-       if (err)
-               goto out_unpin;
-
-       if (read_domains & I915_GEM_DOMAIN_WC)
-               err = i915_gem_object_set_to_wc_domain(obj, write_domain);
-       else if (read_domains & I915_GEM_DOMAIN_GTT)
-               err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
-       else
-               err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
-
-       /* And bump the LRU for this access */
-       i915_gem_object_bump_inactive_ggtt(obj);
-
-       mutex_unlock(&dev->struct_mutex);
-
-       if (write_domain != 0)
-               intel_fb_obj_invalidate(obj,
-                                       fb_write_origin(obj, write_domain));
-
-out_unpin:
-       i915_gem_object_unpin_pages(obj);
-out:
-       i915_gem_object_put(obj);
-       return err;
-}
-
 /**
  * Called when user space has done writes to this buffer
  * @dev: drm device
@@ -1542,514 +1281,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
        return 0;
 }
 
-static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
-{
-       /*
-        * We manually flush the CPU domain so that we can override and
-        * force the flush for the display, and perform it asyncrhonously.
-        */
-       i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-       if (obj->cache_dirty)
-               i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
-       obj->write_domain = 0;
-}
-
-void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
-{
-       if (!READ_ONCE(obj->pin_global))
-               return;
-
-       mutex_lock(&obj->base.dev->struct_mutex);
-       __i915_gem_object_flush_for_display(obj);
-       mutex_unlock(&obj->base.dev->struct_mutex);
-}
-
-/**
- * Moves a single object to the WC read, and possibly write domain.
- * @obj: object to act on
- * @write: ask for write access or read only
- *
- * This function returns when the move is complete, including waiting on
- * flushes to occur.
- */
-int
-i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
-{
-       int ret;
-
-       lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-       ret = i915_gem_object_wait(obj,
-                                  I915_WAIT_INTERRUPTIBLE |
-                                  I915_WAIT_LOCKED |
-                                  (write ? I915_WAIT_ALL : 0),
-                                  MAX_SCHEDULE_TIMEOUT);
-       if (ret)
-               return ret;
-
-       if (obj->write_domain == I915_GEM_DOMAIN_WC)
-               return 0;
-
-       /* Flush and acquire obj->pages so that we are coherent through
-        * direct access in memory with previous cached writes through
-        * shmemfs and that our cache domain tracking remains valid.
-        * For example, if the obj->filp was moved to swap without us
-        * being notified and releasing the pages, we would mistakenly
-        * continue to assume that the obj remained out of the CPU cached
-        * domain.
-        */
-       ret = i915_gem_object_pin_pages(obj);
-       if (ret)
-               return ret;
-
-       i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
-
-       /* Serialise direct access to this object with the barriers for
-        * coherent writes from the GPU, by effectively invalidating the
-        * WC domain upon first access.
-        */
-       if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
-               mb();
-
-       /* It should now be out of any other write domains, and we can update
-        * the domain values for our changes.
-        */
-       GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
-       obj->read_domains |= I915_GEM_DOMAIN_WC;
-       if (write) {
-               obj->read_domains = I915_GEM_DOMAIN_WC;
-               obj->write_domain = I915_GEM_DOMAIN_WC;
-               obj->mm.dirty = true;
-       }
-
-       i915_gem_object_unpin_pages(obj);
-       return 0;
-}
-
-/**
- * Moves a single object to the GTT read, and possibly write domain.
- * @obj: object to act on
- * @write: ask for write access or read only
- *
- * This function returns when the move is complete, including waiting on
- * flushes to occur.
- */
-int
-i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
-{
-       int ret;
-
-       lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-       ret = i915_gem_object_wait(obj,
-                                  I915_WAIT_INTERRUPTIBLE |
-                                  I915_WAIT_LOCKED |
-                                  (write ? I915_WAIT_ALL : 0),
-                                  MAX_SCHEDULE_TIMEOUT);
-       if (ret)
-               return ret;
-
-       if (obj->write_domain == I915_GEM_DOMAIN_GTT)
-               return 0;
-
-       /* Flush and acquire obj->pages so that we are coherent through
-        * direct access in memory with previous cached writes through
-        * shmemfs and that our cache domain tracking remains valid.
-        * For example, if the obj->filp was moved to swap without us
-        * being notified and releasing the pages, we would mistakenly
-        * continue to assume that the obj remained out of the CPU cached
-        * domain.
-        */
-       ret = i915_gem_object_pin_pages(obj);
-       if (ret)
-               return ret;
-
-       i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
-
-       /* Serialise direct access to this object with the barriers for
-        * coherent writes from the GPU, by effectively invalidating the
-        * GTT domain upon first access.
-        */
-       if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
-               mb();
-
-       /* It should now be out of any other write domains, and we can update
-        * the domain values for our changes.
-        */
-       GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
-       obj->read_domains |= I915_GEM_DOMAIN_GTT;
-       if (write) {
-               obj->read_domains = I915_GEM_DOMAIN_GTT;
-               obj->write_domain = I915_GEM_DOMAIN_GTT;
-               obj->mm.dirty = true;
-       }
-
-       i915_gem_object_unpin_pages(obj);
-       return 0;
-}
-
-/**
- * Changes the cache-level of an object across all VMA.
- * @obj: object to act on
- * @cache_level: new cache level to set for the object
- *
- * After this function returns, the object will be in the new cache-level
- * across all GTT and the contents of the backing storage will be coherent,
- * with respect to the new cache-level. In order to keep the backing storage
- * coherent for all users, we only allow a single cache level to be set
- * globally on the object and prevent it from being changed whilst the
- * hardware is reading from the object. That is if the object is currently
- * on the scanout it will be set to uncached (or equivalent display
- * cache coherency) and all non-MOCS GPU access will also be uncached so
- * that all direct access to the scanout remains coherent.
- */
-int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
-                                   enum i915_cache_level cache_level)
-{
-       struct i915_vma *vma;
-       int ret;
-
-       lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-       if (obj->cache_level == cache_level)
-               return 0;
-
-       /* Inspect the list of currently bound VMA and unbind any that would
-        * be invalid given the new cache-level. This is principally to
-        * catch the issue of the CS prefetch crossing page boundaries and
-        * reading an invalid PTE on older architectures.
-        */
-restart:
-       list_for_each_entry(vma, &obj->vma.list, obj_link) {
-               if (!drm_mm_node_allocated(&vma->node))
-                       continue;
-
-               if (i915_vma_is_pinned(vma)) {
-                       DRM_DEBUG("can not change the cache level of pinned objects\n");
-                       return -EBUSY;
-               }
-
-               if (!i915_vma_is_closed(vma) &&
-                   i915_gem_valid_gtt_space(vma, cache_level))
-                       continue;
-
-               ret = i915_vma_unbind(vma);
-               if (ret)
-                       return ret;
-
-               /* As unbinding may affect other elements in the
-                * obj->vma_list (due to side-effects from retiring
-                * an active vma), play safe and restart the iterator.
-                */
-               goto restart;
-       }
-
-       /* We can reuse the existing drm_mm nodes but need to change the
-        * cache-level on the PTE. We could simply unbind them all and
-        * rebind with the correct cache-level on next use. However since
-        * we already have a valid slot, dma mapping, pages etc, we may as
-        * rewrite the PTE in the belief that doing so tramples upon less
-        * state and so involves less work.
-        */
-       if (obj->bind_count) {
-               /* Before we change the PTE, the GPU must not be accessing it.
-                * If we wait upon the object, we know that all the bound
-                * VMA are no longer active.
-                */
-               ret = i915_gem_object_wait(obj,
-                                          I915_WAIT_INTERRUPTIBLE |
-                                          I915_WAIT_LOCKED |
-                                          I915_WAIT_ALL,
-                                          MAX_SCHEDULE_TIMEOUT);
-               if (ret)
-                       return ret;
-
-               if (!HAS_LLC(to_i915(obj->base.dev)) &&
-                   cache_level != I915_CACHE_NONE) {
-                       /* Access to snoopable pages through the GTT is
-                        * incoherent and on some machines causes a hard
-                        * lockup. Relinquish the CPU mmaping to force
-                        * userspace to refault in the pages and we can
-                        * then double check if the GTT mapping is still
-                        * valid for that pointer access.
-                        */
-                       i915_gem_object_release_mmap(obj);
-
-                       /* As we no longer need a fence for GTT access,
-                        * we can relinquish it now (and so prevent having
-                        * to steal a fence from someone else on the next
-                        * fence request). Note GPU activity would have
-                        * dropped the fence as all snoopable access is
-                        * supposed to be linear.
-                        */
-                       for_each_ggtt_vma(vma, obj) {
-                               ret = i915_vma_put_fence(vma);
-                               if (ret)
-                                       return ret;
-                       }
-               } else {
-                       /* We either have incoherent backing store and
-                        * so no GTT access or the architecture is fully
-                        * coherent. In such cases, existing GTT mmaps
-                        * ignore the cache bit in the PTE and we can
-                        * rewrite it without confusing the GPU or having
-                        * to force userspace to fault back in its mmaps.
-                        */
-               }
-
-               list_for_each_entry(vma, &obj->vma.list, obj_link) {
-                       if (!drm_mm_node_allocated(&vma->node))
-                               continue;
-
-                       ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
-                       if (ret)
-                               return ret;
-               }
-       }
-
-       list_for_each_entry(vma, &obj->vma.list, obj_link)
-               vma->node.color = cache_level;
-       i915_gem_object_set_cache_coherency(obj, cache_level);
-       obj->cache_dirty = true; /* Always invalidate stale cachelines */
-
-       return 0;
-}
-
-int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
-                              struct drm_file *file)
-{
-       struct drm_i915_gem_caching *args = data;
-       struct drm_i915_gem_object *obj;
-       int err = 0;
-
-       rcu_read_lock();
-       obj = i915_gem_object_lookup_rcu(file, args->handle);
-       if (!obj) {
-               err = -ENOENT;
-               goto out;
-       }
-
-       switch (obj->cache_level) {
-       case I915_CACHE_LLC:
-       case I915_CACHE_L3_LLC:
-               args->caching = I915_CACHING_CACHED;
-               break;
-
-       case I915_CACHE_WT:
-               args->caching = I915_CACHING_DISPLAY;
-               break;
-
-       default:
-               args->caching = I915_CACHING_NONE;
-               break;
-       }
-out:
-       rcu_read_unlock();
-       return err;
-}
-
-int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
-                              struct drm_file *file)
-{
-       struct drm_i915_private *i915 = to_i915(dev);
-       struct drm_i915_gem_caching *args = data;
-       struct drm_i915_gem_object *obj;
-       enum i915_cache_level level;
-       int ret = 0;
-
-       switch (args->caching) {
-       case I915_CACHING_NONE:
-               level = I915_CACHE_NONE;
-               break;
-       case I915_CACHING_CACHED:
-               /*
-                * Due to a HW issue on BXT A stepping, GPU stores via a
-                * snooped mapping may leave stale data in a corresponding CPU
-                * cacheline, whereas normally such cachelines would get
-                * invalidated.
-                */
-               if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
-                       return -ENODEV;
-
-               level = I915_CACHE_LLC;
-               break;
-       case I915_CACHING_DISPLAY:
-               level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       obj = i915_gem_object_lookup(file, args->handle);
-       if (!obj)
-               return -ENOENT;
-
-       /*
-        * The caching mode of proxy object is handled by its generator, and
-        * not allowed to be changed by userspace.
-        */
-       if (i915_gem_object_is_proxy(obj)) {
-               ret = -ENXIO;
-               goto out;
-       }
-
-       if (obj->cache_level == level)
-               goto out;
-
-       ret = i915_gem_object_wait(obj,
-                                  I915_WAIT_INTERRUPTIBLE,
-                                  MAX_SCHEDULE_TIMEOUT);
-       if (ret)
-               goto out;
-
-       ret = i915_mutex_lock_interruptible(dev);
-       if (ret)
-               goto out;
-
-       ret = i915_gem_object_set_cache_level(obj, level);
-       mutex_unlock(&dev->struct_mutex);
-
-out:
-       i915_gem_object_put(obj);
-       return ret;
-}
-
-/*
- * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
- * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
- * (for pageflips). We only flush the caches while preparing the buffer for
- * display, the callers are responsible for frontbuffer flush.
- */
-struct i915_vma *
-i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
-                                    u32 alignment,
-                                    const struct i915_ggtt_view *view,
-                                    unsigned int flags)
-{
-       struct i915_vma *vma;
-       int ret;
-
-       lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-       /* Mark the global pin early so that we account for the
-        * display coherency whilst setting up the cache domains.
-        */
-       obj->pin_global++;
-
-       /* The display engine is not coherent with the LLC cache on gen6.  As
-        * a result, we make sure that the pinning that is about to occur is
-        * done with uncached PTEs. This is lowest common denominator for all
-        * chipsets.
-        *
-        * However for gen6+, we could do better by using the GFDT bit instead
-        * of uncaching, which would allow us to flush all the LLC-cached data
-        * with that bit in the PTE to main memory with just one PIPE_CONTROL.
-        */
-       ret = i915_gem_object_set_cache_level(obj,
-                                             HAS_WT(to_i915(obj->base.dev)) ?
-                                             I915_CACHE_WT : I915_CACHE_NONE);
-       if (ret) {
-               vma = ERR_PTR(ret);
-               goto err_unpin_global;
-       }
-
-       /* As the user may map the buffer once pinned in the display plane
-        * (e.g. libkms for the bootup splash), we have to ensure that we
-        * always use map_and_fenceable for all scanout buffers. However,
-        * it may simply be too big to fit into mappable, in which case
-        * put it anyway and hope that userspace can cope (but always first
-        * try to preserve the existing ABI).
-        */
-       vma = ERR_PTR(-ENOSPC);
-       if ((flags & PIN_MAPPABLE) == 0 &&
-           (!view || view->type == I915_GGTT_VIEW_NORMAL))
-               vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
-                                              flags |
-                                              PIN_MAPPABLE |
-                                              PIN_NONBLOCK);
-       if (IS_ERR(vma))
-               vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
-       if (IS_ERR(vma))
-               goto err_unpin_global;
-
-       vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
-
-       __i915_gem_object_flush_for_display(obj);
-
-       /* It should now be out of any other write domains, and we can update
-        * the domain values for our changes.
-        */
-       obj->read_domains |= I915_GEM_DOMAIN_GTT;
-
-       return vma;
-
-err_unpin_global:
-       obj->pin_global--;
-       return vma;
-}
-
-void
-i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
-{
-       lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
-
-       if (WARN_ON(vma->obj->pin_global == 0))
-               return;
-
-       if (--vma->obj->pin_global == 0)
-               vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
-
-       /* Bump the LRU to try and avoid premature eviction whilst flipping  */
-       i915_gem_object_bump_inactive_ggtt(vma->obj);
-
-       i915_vma_unpin(vma);
-}
-
-/**
- * Moves a single object to the CPU read, and possibly write domain.
- * @obj: object to act on
- * @write: requesting write or read-only access
- *
- * This function returns when the move is complete, including waiting on
- * flushes to occur.
- */
-int
-i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
-{
-       int ret;
-
-       lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-       ret = i915_gem_object_wait(obj,
-                                  I915_WAIT_INTERRUPTIBLE |
-                                  I915_WAIT_LOCKED |
-                                  (write ? I915_WAIT_ALL : 0),
-                                  MAX_SCHEDULE_TIMEOUT);
-       if (ret)
-               return ret;
-
-       i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
-       /* Flush the CPU cache if it's still invalid. */
-       if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
-               i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
-               obj->read_domains |= I915_GEM_DOMAIN_CPU;
-       }
-
-       /* It should now be out of any other write domains, and we can update
-        * the domain values for our changes.
-        */
-       GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
-
-       /* If we're writing through the CPU, then the GPU read domains will
-        * need to be invalidated at next use.
-        */
-       if (write)
-               __start_cpu_write(obj);
-
-       return 0;
-}
-
 /* Throttle our rendering by waiting until the ring has completed our requests
  * emitted over 20 msec ago.
  *
index 908fddcc57c3390524a40f9e3a4f0400da08400a..699f3f180d8a395dca08ed90ba303547552a1dfe 100644 (file)
@@ -1026,7 +1026,7 @@ static void reloc_cache_reset(struct reloc_cache *cache)
                        mb();
 
                kunmap_atomic(vaddr);
-               i915_gem_obj_finish_shmem_access((struct drm_i915_gem_object *)cache->node.mm);
+               i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm);
        } else {
                wmb();
                io_mapping_unmap_atomic((void __iomem *)vaddr);
@@ -1058,7 +1058,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
                unsigned int flushes;
                int err;
 
-               err = i915_gem_obj_prepare_shmem_write(obj, &flushes);
+               err = i915_gem_object_prepare_write(obj, &flushes);
                if (err)
                        return ERR_PTR(err);
 
index 9440024c763f3a61372ede0ee9bbc41f2886e33f..f3b42b026fff49ad6c39d3e8e01d40f47b0f6db5 100644 (file)
@@ -84,7 +84,7 @@ static int render_state_setup(struct intel_render_state *so,
        u32 *d;
        int ret;
 
-       ret = i915_gem_obj_prepare_shmem_write(so->obj, &needs_clflush);
+       ret = i915_gem_object_prepare_write(so->obj, &needs_clflush);
        if (ret)
                return ret;
 
@@ -166,7 +166,7 @@ static int render_state_setup(struct intel_render_state *so,
 
        ret = 0;
 out:
-       i915_gem_obj_finish_shmem_access(so->obj);
+       i915_gem_object_finish_access(so->obj);
        return ret;
 
 err:
index ce4ec87698f699648dbbc70e8a4eacdfbaa17208..b22b8249dfbd43bacfa2fb4a77115e37980b9412 100644 (file)
@@ -1017,7 +1017,7 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
        unsigned long n;
        int err;
 
-       err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush);
+       err = i915_gem_object_prepare_read(obj, &needs_flush);
        if (err)
                return err;
 
@@ -1038,7 +1038,7 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
                kunmap_atomic(ptr);
        }
 
-       i915_gem_obj_finish_shmem_access(obj);
+       i915_gem_object_finish_access(obj);
 
        return err;
 }
index 046a38743152c4b66de08339c2957dcf1920dab9..cb25b5fc8027b562693d2247670ac281873cd9c9 100644 (file)
@@ -37,7 +37,7 @@ static int cpu_set(struct drm_i915_gem_object *obj,
        u32 *cpu;
        int err;
 
-       err = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
+       err = i915_gem_object_prepare_write(obj, &needs_clflush);
        if (err)
                return err;
 
@@ -54,7 +54,7 @@ static int cpu_set(struct drm_i915_gem_object *obj,
                drm_clflush_virt_range(cpu, sizeof(*cpu));
 
        kunmap_atomic(map);
-       i915_gem_obj_finish_shmem_access(obj);
+       i915_gem_object_finish_access(obj);
 
        return 0;
 }
@@ -69,7 +69,7 @@ static int cpu_get(struct drm_i915_gem_object *obj,
        u32 *cpu;
        int err;
 
-       err = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
+       err = i915_gem_object_prepare_read(obj, &needs_clflush);
        if (err)
                return err;
 
@@ -83,7 +83,7 @@ static int cpu_get(struct drm_i915_gem_object *obj,
        *v = *cpu;
 
        kunmap_atomic(map);
-       i915_gem_obj_finish_shmem_access(obj);
+       i915_gem_object_finish_access(obj);
 
        return 0;
 }
index 34ac5cc6d59fb54bd30e7026e5b6a2608fb83d83..c69c6d9a998b7b07245b711ee9dd830d802784e5 100644 (file)
@@ -354,7 +354,7 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
        unsigned int n, m, need_flush;
        int err;
 
-       err = i915_gem_obj_prepare_shmem_write(obj, &need_flush);
+       err = i915_gem_object_prepare_write(obj, &need_flush);
        if (err)
                return err;
 
@@ -369,7 +369,7 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
                kunmap_atomic(map);
        }
 
-       i915_gem_obj_finish_shmem_access(obj);
+       i915_gem_object_finish_access(obj);
        obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
        obj->write_domain = 0;
        return 0;
@@ -381,7 +381,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
        unsigned int n, m, needs_flush;
        int err;
 
-       err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush);
+       err = i915_gem_object_prepare_read(obj, &needs_flush);
        if (err)
                return err;
 
@@ -419,7 +419,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
                        break;
        }
 
-       i915_gem_obj_finish_shmem_access(obj);
+       i915_gem_object_finish_access(obj);
        return err;
 }