drivers/gpu/drm/i915/gem/i915_gem_domain.c

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2014-2016 Intel Corporation
   5  */
   6
   7 #include "display/intel_frontbuffer.h"
   8
   9 #include "i915_drv.h"
  10 #include "i915_gem_clflush.h"
  11 #include "i915_gem_gtt.h"
  12 #include "i915_gem_ioctls.h"
  13 #include "i915_gem_object.h"
  14 #include "i915_vma.h"
  15 #include "i915_gem_lmem.h"
  16
  17 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
  18 {
  19         /*
  20          * We manually flush the CPU domain so that we can override and
  21          * force the flush for the display, and perform it asyncrhonously.
  22          */
  23         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
  24         if (obj->cache_dirty)
  25                 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
  26         obj->write_domain = 0;
  27 }
  28
  29 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
  30 {
  31         if (!i915_gem_object_is_framebuffer(obj))
  32                 return;
  33
  34         i915_gem_object_lock(obj);
  35         __i915_gem_object_flush_for_display(obj);
  36         i915_gem_object_unlock(obj);
  37 }
  38
  39 /**
  40  * Moves a single object to the WC read, and possibly write domain.
  41  * @obj: object to act on
  42  * @write: ask for write access or read only
  43  *
  44  * This function returns when the move is complete, including waiting on
  45  * flushes to occur.
  46  */
  47 int
  48 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
  49 {
  50         int ret;
  51
  52         assert_object_held(obj);
  53
  54         ret = i915_gem_object_wait(obj,
  55                                    I915_WAIT_INTERRUPTIBLE |
  56                                    (write ? I915_WAIT_ALL : 0),
  57                                    MAX_SCHEDULE_TIMEOUT);
  58         if (ret)
  59                 return ret;
  60
  61         if (obj->write_domain == I915_GEM_DOMAIN_WC)
  62                 return 0;
  63
  64         /* Flush and acquire obj->pages so that we are coherent through
  65          * direct access in memory with previous cached writes through
  66          * shmemfs and that our cache domain tracking remains valid.
  67          * For example, if the obj->filp was moved to swap without us
  68          * being notified and releasing the pages, we would mistakenly
  69          * continue to assume that the obj remained out of the CPU cached
  70          * domain.
  71          */
  72         ret = i915_gem_object_pin_pages(obj);
  73         if (ret)
  74                 return ret;
  75
  76         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
  77
  78         /* Serialise direct access to this object with the barriers for
  79          * coherent writes from the GPU, by effectively invalidating the
  80          * WC domain upon first access.
  81          */
  82         if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
  83                 mb();
  84
  85         /* It should now be out of any other write domains, and we can update
  86          * the domain values for our changes.
  87          */
  88         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
  89         obj->read_domains |= I915_GEM_DOMAIN_WC;
  90         if (write) {
  91                 obj->read_domains = I915_GEM_DOMAIN_WC;
  92                 obj->write_domain = I915_GEM_DOMAIN_WC;
  93                 obj->mm.dirty = true;
  94         }
  95
  96         i915_gem_object_unpin_pages(obj);
  97         return 0;
  98 }
  99
 100 /**
 101  * Moves a single object to the GTT read, and possibly write domain.
 102  * @obj: object to act on
 103  * @write: ask for write access or read only
 104  *
 105  * This function returns when the move is complete, including waiting on
 106  * flushes to occur.
 107  */
 108 int
 109 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 110 {
 111         int ret;
 112
 113         assert_object_held(obj);
 114
 115         ret = i915_gem_object_wait(obj,
 116                                    I915_WAIT_INTERRUPTIBLE |
 117                                    (write ? I915_WAIT_ALL : 0),
 118                                    MAX_SCHEDULE_TIMEOUT);
 119         if (ret)
 120                 return ret;
 121
 122         if (obj->write_domain == I915_GEM_DOMAIN_GTT)
 123                 return 0;
 124
 125         /* Flush and acquire obj->pages so that we are coherent through
 126          * direct access in memory with previous cached writes through
 127          * shmemfs and that our cache domain tracking remains valid.
 128          * For example, if the obj->filp was moved to swap without us
 129          * being notified and releasing the pages, we would mistakenly
 130          * continue to assume that the obj remained out of the CPU cached
 131          * domain.
 132          */
 133         ret = i915_gem_object_pin_pages(obj);
 134         if (ret)
 135                 return ret;
 136
 137         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
 138
 139         /* Serialise direct access to this object with the barriers for
 140          * coherent writes from the GPU, by effectively invalidating the
 141          * GTT domain upon first access.
 142          */
 143         if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
 144                 mb();
 145
 146         /* It should now be out of any other write domains, and we can update
 147          * the domain values for our changes.
 148          */
 149         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
 150         obj->read_domains |= I915_GEM_DOMAIN_GTT;
 151         if (write) {
 152                 struct i915_vma *vma;
 153
 154                 obj->read_domains = I915_GEM_DOMAIN_GTT;
 155                 obj->write_domain = I915_GEM_DOMAIN_GTT;
 156                 obj->mm.dirty = true;
 157
 158                 spin_lock(&obj->vma.lock);
 159                 for_each_ggtt_vma(vma, obj)
 160                         if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
 161                                 i915_vma_set_ggtt_write(vma);
 162                 spin_unlock(&obj->vma.lock);
 163         }
 164
 165         i915_gem_object_unpin_pages(obj);
 166         return 0;
 167 }
 168
 169 /**
 170  * Changes the cache-level of an object across all VMA.
 171  * @obj: object to act on
 172  * @cache_level: new cache level to set for the object
 173  *
 174  * After this function returns, the object will be in the new cache-level
 175  * across all GTT and the contents of the backing storage will be coherent,
 176  * with respect to the new cache-level. In order to keep the backing storage
 177  * coherent for all users, we only allow a single cache level to be set
 178  * globally on the object and prevent it from being changed whilst the
 179  * hardware is reading from the object. That is if the object is currently
 180  * on the scanout it will be set to uncached (or equivalent display
 181  * cache coherency) and all non-MOCS GPU access will also be uncached so
 182  * that all direct access to the scanout remains coherent.
 183  */
 184 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 185                                     enum i915_cache_level cache_level)
 186 {
 187         struct i915_vma *vma;
 188         int ret;
 189
 190         assert_object_held(obj);
 191
 192         if (obj->cache_level == cache_level)
 193                 return 0;
 194
 195         /* Inspect the list of currently bound VMA and unbind any that would
 196          * be invalid given the new cache-level. This is principally to
 197          * catch the issue of the CS prefetch crossing page boundaries and
 198          * reading an invalid PTE on older architectures.
 199          */
 200 restart:
 201         list_for_each_entry(vma, &obj->vma.list, obj_link) {
 202                 if (!drm_mm_node_allocated(&vma->node))
 203                         continue;
 204
 205                 if (i915_vma_is_pinned(vma)) {
 206                         DRM_DEBUG("can not change the cache level of pinned objects\n");
 207                         return -EBUSY;
 208                 }
 209
 210                 if (!i915_vma_is_closed(vma) &&
 211                     i915_gem_valid_gtt_space(vma, cache_level))
 212                         continue;
 213
 214                 ret = i915_vma_unbind(vma);
 215                 if (ret)
 216                         return ret;
 217
 218                 /* As unbinding may affect other elements in the
 219                  * obj->vma_list (due to side-effects from retiring
 220                  * an active vma), play safe and restart the iterator.
 221                  */
 222                 goto restart;
 223         }
 224
 225         /* We can reuse the existing drm_mm nodes but need to change the
 226          * cache-level on the PTE. We could simply unbind them all and
 227          * rebind with the correct cache-level on next use. However since
 228          * we already have a valid slot, dma mapping, pages etc, we may as
 229          * rewrite the PTE in the belief that doing so tramples upon less
 230          * state and so involves less work.
 231          */
 232         if (atomic_read(&obj->bind_count)) {
 233                 struct drm_i915_private *i915 = to_i915(obj->base.dev);
 234
 235                 /* Before we change the PTE, the GPU must not be accessing it.
 236                  * If we wait upon the object, we know that all the bound
 237                  * VMA are no longer active.
 238                  */
 239                 ret = i915_gem_object_wait(obj,
 240                                            I915_WAIT_INTERRUPTIBLE |
 241                                            I915_WAIT_ALL,
 242                                            MAX_SCHEDULE_TIMEOUT);
 243                 if (ret)
 244                         return ret;
 245
 246                 if (!HAS_LLC(i915) && cache_level != I915_CACHE_NONE) {
 247                         intel_wakeref_t wakeref =
 248                                 intel_runtime_pm_get(&i915->runtime_pm);
 249
 250                         /*
 251                          * Access to snoopable pages through the GTT is
 252                          * incoherent and on some machines causes a hard
 253                          * lockup. Relinquish the CPU mmaping to force
 254                          * userspace to refault in the pages and we can
 255                          * then double check if the GTT mapping is still
 256                          * valid for that pointer access.
 257                          */
 258                         ret = mutex_lock_interruptible(&i915->ggtt.vm.mutex);
 259                         if (ret) {
 260                                 intel_runtime_pm_put(&i915->runtime_pm,
 261                                                      wakeref);
 262                                 return ret;
 263                         }
 264
 265                         if (obj->userfault_count)
 266                                 __i915_gem_object_release_mmap(obj);
 267
 268                         /*
 269                          * As we no longer need a fence for GTT access,
 270                          * we can relinquish it now (and so prevent having
 271                          * to steal a fence from someone else on the next
 272                          * fence request). Note GPU activity would have
 273                          * dropped the fence as all snoopable access is
 274                          * supposed to be linear.
 275                          */
 276                         for_each_ggtt_vma(vma, obj) {
 277                                 ret = i915_vma_revoke_fence(vma);
 278                                 if (ret)
 279                                         break;
 280                         }
 281                         mutex_unlock(&i915->ggtt.vm.mutex);
 282                         intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 283                         if (ret)
 284                                 return ret;
 285                 } else {
 286                         /*
 287                          * We either have incoherent backing store and
 288                          * so no GTT access or the architecture is fully
 289                          * coherent. In such cases, existing GTT mmaps
 290                          * ignore the cache bit in the PTE and we can
 291                          * rewrite it without confusing the GPU or having
 292                          * to force userspace to fault back in its mmaps.
 293                          */
 294                 }
 295
 296                 list_for_each_entry(vma, &obj->vma.list, obj_link) {
 297                         if (!drm_mm_node_allocated(&vma->node))
 298                                 continue;
 299
 300                         /* Wait for an earlier async bind, need to rewrite it */
 301                         ret = i915_vma_sync(vma);
 302                         if (ret)
 303                                 return ret;
 304
 305                         ret = i915_vma_bind(vma, cache_level, PIN_UPDATE, NULL);
 306                         if (ret)
 307                                 return ret;
 308                 }
 309         }
 310
 311         list_for_each_entry(vma, &obj->vma.list, obj_link) {
 312                 if (i915_vm_has_cache_coloring(vma->vm))
 313                         vma->node.color = cache_level;
 314         }
 315         i915_gem_object_set_cache_coherency(obj, cache_level);
 316         obj->cache_dirty = true; /* Always invalidate stale cachelines */
 317
 318         return 0;
 319 }
 320
 321 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
 322                                struct drm_file *file)
 323 {
 324         struct drm_i915_gem_caching *args = data;
 325         struct drm_i915_gem_object *obj;
 326         int err = 0;
 327
 328         rcu_read_lock();
 329         obj = i915_gem_object_lookup_rcu(file, args->handle);
 330         if (!obj) {
 331                 err = -ENOENT;
 332                 goto out;
 333         }
 334
 335         switch (obj->cache_level) {
 336         case I915_CACHE_LLC:
 337         case I915_CACHE_L3_LLC:
 338                 args->caching = I915_CACHING_CACHED;
 339                 break;
 340
 341         case I915_CACHE_WT:
 342                 args->caching = I915_CACHING_DISPLAY;
 343                 break;
 344
 345         default:
 346                 args->caching = I915_CACHING_NONE;
 347                 break;
 348         }
 349 out:
 350         rcu_read_unlock();
 351         return err;
 352 }
 353
 354 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 355                                struct drm_file *file)
 356 {
 357         struct drm_i915_private *i915 = to_i915(dev);
 358         struct drm_i915_gem_caching *args = data;
 359         struct drm_i915_gem_object *obj;
 360         enum i915_cache_level level;
 361         int ret = 0;
 362
 363         switch (args->caching) {
 364         case I915_CACHING_NONE:
 365                 level = I915_CACHE_NONE;
 366                 break;
 367         case I915_CACHING_CACHED:
 368                 /*
 369                  * Due to a HW issue on BXT A stepping, GPU stores via a
 370                  * snooped mapping may leave stale data in a corresponding CPU
 371                  * cacheline, whereas normally such cachelines would get
 372                  * invalidated.
 373                  */
 374                 if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
 375                         return -ENODEV;
 376
 377                 level = I915_CACHE_LLC;
 378                 break;
 379         case I915_CACHING_DISPLAY:
 380                 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
 381                 break;
 382         default:
 383                 return -EINVAL;
 384         }
 385
 386         obj = i915_gem_object_lookup(file, args->handle);
 387         if (!obj)
 388                 return -ENOENT;
 389
 390         /*
 391          * The caching mode of proxy object is handled by its generator, and
 392          * not allowed to be changed by userspace.
 393          */
 394         if (i915_gem_object_is_proxy(obj)) {
 395                 ret = -ENXIO;
 396                 goto out;
 397         }
 398
 399         if (obj->cache_level == level)
 400                 goto out;
 401
 402         ret = i915_gem_object_wait(obj,
 403                                    I915_WAIT_INTERRUPTIBLE,
 404                                    MAX_SCHEDULE_TIMEOUT);
 405         if (ret)
 406                 goto out;
 407
 408         ret = i915_gem_object_lock_interruptible(obj);
 409         if (ret == 0) {
 410                 ret = i915_gem_object_set_cache_level(obj, level);
 411                 i915_gem_object_unlock(obj);
 412         }
 413
 414 out:
 415         i915_gem_object_put(obj);
 416         return ret;
 417 }
 418
 419 /*
 420  * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
 421  * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
 422  * (for pageflips). We only flush the caches while preparing the buffer for
 423  * display, the callers are responsible for frontbuffer flush.
 424  */
 425 struct i915_vma *
 426 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 427                                      u32 alignment,
 428                                      const struct i915_ggtt_view *view,
 429                                      unsigned int flags)
 430 {
 431         struct drm_i915_private *i915 = to_i915(obj->base.dev);
 432         struct i915_vma *vma;
 433         int ret;
 434
 435         assert_object_held(obj);
 436
 437         /* Frame buffer must be in LMEM (no migration yet) */
 438         if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
 439                 return ERR_PTR(-EINVAL);
 440
 441         /*
 442          * The display engine is not coherent with the LLC cache on gen6.  As
 443          * a result, we make sure that the pinning that is about to occur is
 444          * done with uncached PTEs. This is lowest common denominator for all
 445          * chipsets.
 446          *
 447          * However for gen6+, we could do better by using the GFDT bit instead
 448          * of uncaching, which would allow us to flush all the LLC-cached data
 449          * with that bit in the PTE to main memory with just one PIPE_CONTROL.
 450          */
 451         ret = i915_gem_object_set_cache_level(obj,
 452                                               HAS_WT(i915) ?
 453                                               I915_CACHE_WT : I915_CACHE_NONE);
 454         if (ret)
 455                 return ERR_PTR(ret);
 456
 457         /*
 458          * As the user may map the buffer once pinned in the display plane
 459          * (e.g. libkms for the bootup splash), we have to ensure that we
 460          * always use map_and_fenceable for all scanout buffers. However,
 461          * it may simply be too big to fit into mappable, in which case
 462          * put it anyway and hope that userspace can cope (but always first
 463          * try to preserve the existing ABI).
 464          */
 465         vma = ERR_PTR(-ENOSPC);
 466         if ((flags & PIN_MAPPABLE) == 0 &&
 467             (!view || view->type == I915_GGTT_VIEW_NORMAL))
 468                 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
 469                                                flags |
 470                                                PIN_MAPPABLE |
 471                                                PIN_NONBLOCK);
 472         if (IS_ERR(vma))
 473                 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
 474         if (IS_ERR(vma))
 475                 return vma;
 476
 477         vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
 478
 479         __i915_gem_object_flush_for_display(obj);
 480
 481         /*
 482          * It should now be out of any other write domains, and we can update
 483          * the domain values for our changes.
 484          */
 485         obj->read_domains |= I915_GEM_DOMAIN_GTT;
 486
 487         return vma;
 488 }
 489
 490 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 491 {
 492         struct drm_i915_private *i915 = to_i915(obj->base.dev);
 493         struct i915_vma *vma;
 494
 495         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
 496
 497         mutex_lock(&i915->ggtt.vm.mutex);
 498         for_each_ggtt_vma(vma, obj) {
 499                 if (!drm_mm_node_allocated(&vma->node))
 500                         continue;
 501
 502                 GEM_BUG_ON(vma->vm != &i915->ggtt.vm);
 503                 list_move_tail(&vma->vm_link, &vma->vm->bound_list);
 504         }
 505         mutex_unlock(&i915->ggtt.vm.mutex);
 506
 507         if (i915_gem_object_is_shrinkable(obj)) {
 508                 unsigned long flags;
 509
 510                 spin_lock_irqsave(&i915->mm.obj_lock, flags);
 511
 512                 if (obj->mm.madv == I915_MADV_WILLNEED &&
 513                     !atomic_read(&obj->mm.shrink_pin))
 514                         list_move_tail(&obj->mm.link, &i915->mm.shrink_list);
 515
 516                 spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 517         }
 518 }
 519
 520 void
 521 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
 522 {
 523         struct drm_i915_gem_object *obj = vma->obj;
 524
 525         assert_object_held(obj);
 526
 527         /* Bump the LRU to try and avoid premature eviction whilst flipping  */
 528         i915_gem_object_bump_inactive_ggtt(obj);
 529
 530         i915_vma_unpin(vma);
 531 }
 532
 533 /**
 534  * Moves a single object to the CPU read, and possibly write domain.
 535  * @obj: object to act on
 536  * @write: requesting write or read-only access
 537  *
 538  * This function returns when the move is complete, including waiting on
 539  * flushes to occur.
 540  */
 541 int
 542 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 543 {
 544         int ret;
 545
 546         assert_object_held(obj);
 547
 548         ret = i915_gem_object_wait(obj,
 549                                    I915_WAIT_INTERRUPTIBLE |
 550                                    (write ? I915_WAIT_ALL : 0),
 551                                    MAX_SCHEDULE_TIMEOUT);
 552         if (ret)
 553                 return ret;
 554
 555         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 556
 557         /* Flush the CPU cache if it's still invalid. */
 558         if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
 559                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
 560                 obj->read_domains |= I915_GEM_DOMAIN_CPU;
 561         }
 562
 563         /* It should now be out of any other write domains, and we can update
 564          * the domain values for our changes.
 565          */
 566         GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
 567
 568         /* If we're writing through the CPU, then the GPU read domains will
 569          * need to be invalidated at next use.
 570          */
 571         if (write)
 572                 __start_cpu_write(obj);
 573
 574         return 0;
 575 }
 576
 577 /**
 578  * Called when user space prepares to use an object with the CPU, either
 579  * through the mmap ioctl's mapping or a GTT mapping.
 580  * @dev: drm device
 581  * @data: ioctl data blob
 582  * @file: drm file
 583  */
 584 int
 585 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 586                           struct drm_file *file)
 587 {
 588         struct drm_i915_gem_set_domain *args = data;
 589         struct drm_i915_gem_object *obj;
 590         u32 read_domains = args->read_domains;
 591         u32 write_domain = args->write_domain;
 592         int err;
 593
 594         /* Only handle setting domains to types used by the CPU. */
 595         if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
 596                 return -EINVAL;
 597
 598         /*
 599          * Having something in the write domain implies it's in the read
 600          * domain, and only that read domain.  Enforce that in the request.
 601          */
 602         if (write_domain && read_domains != write_domain)
 603                 return -EINVAL;
 604
 605         if (!read_domains)
 606                 return 0;
 607
 608         obj = i915_gem_object_lookup(file, args->handle);
 609         if (!obj)
 610                 return -ENOENT;
 611
 612         /*
 613          * Already in the desired write domain? Nothing for us to do!
 614          *
 615          * We apply a little bit of cunning here to catch a broader set of
 616          * no-ops. If obj->write_domain is set, we must be in the same
 617          * obj->read_domains, and only that domain. Therefore, if that
 618          * obj->write_domain matches the request read_domains, we are
 619          * already in the same read/write domain and can skip the operation,
 620          * without having to further check the requested write_domain.
 621          */
 622         if (READ_ONCE(obj->write_domain) == read_domains) {
 623                 err = 0;
 624                 goto out;
 625         }
 626
 627         /*
 628          * Try to flush the object off the GPU without holding the lock.
 629          * We will repeat the flush holding the lock in the normal manner
 630          * to catch cases where we are gazumped.
 631          */
 632         err = i915_gem_object_wait(obj,
 633                                    I915_WAIT_INTERRUPTIBLE |
 634                                    I915_WAIT_PRIORITY |
 635                                    (write_domain ? I915_WAIT_ALL : 0),
 636                                    MAX_SCHEDULE_TIMEOUT);
 637         if (err)
 638                 goto out;
 639
 640         /*
 641          * Proxy objects do not control access to the backing storage, ergo
 642          * they cannot be used as a means to manipulate the cache domain
 643          * tracking for that backing storage. The proxy object is always
 644          * considered to be outside of any cache domain.
 645          */
 646         if (i915_gem_object_is_proxy(obj)) {
 647                 err = -ENXIO;
 648                 goto out;
 649         }
 650
 651         /*
 652          * Flush and acquire obj->pages so that we are coherent through
 653          * direct access in memory with previous cached writes through
 654          * shmemfs and that our cache domain tracking remains valid.
 655          * For example, if the obj->filp was moved to swap without us
 656          * being notified and releasing the pages, we would mistakenly
 657          * continue to assume that the obj remained out of the CPU cached
 658          * domain.
 659          */
 660         err = i915_gem_object_pin_pages(obj);
 661         if (err)
 662                 goto out;
 663
 664         err = i915_gem_object_lock_interruptible(obj);
 665         if (err)
 666                 goto out_unpin;
 667
 668         if (read_domains & I915_GEM_DOMAIN_WC)
 669                 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
 670         else if (read_domains & I915_GEM_DOMAIN_GTT)
 671                 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
 672         else
 673                 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
 674
 675         /* And bump the LRU for this access */
 676         i915_gem_object_bump_inactive_ggtt(obj);
 677
 678         i915_gem_object_unlock(obj);
 679
 680         if (write_domain)
 681                 intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
 682
 683 out_unpin:
 684         i915_gem_object_unpin_pages(obj);
 685 out:
 686         i915_gem_object_put(obj);
 687         return err;
 688 }
 689
 690 /*
 691  * Pins the specified object's pages and synchronizes the object with
 692  * GPU accesses. Sets needs_clflush to non-zero if the caller should
 693  * flush the object from the CPU cache.
 694  */
 695 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
 696                                  unsigned int *needs_clflush)
 697 {
 698         int ret;
 699
 700         *needs_clflush = 0;
 701         if (!i915_gem_object_has_struct_page(obj))
 702                 return -ENODEV;
 703
 704         ret = i915_gem_object_lock_interruptible(obj);
 705         if (ret)
 706                 return ret;
 707
 708         ret = i915_gem_object_wait(obj,
 709                                    I915_WAIT_INTERRUPTIBLE,
 710                                    MAX_SCHEDULE_TIMEOUT);
 711         if (ret)
 712                 goto err_unlock;
 713
 714         ret = i915_gem_object_pin_pages(obj);
 715         if (ret)
 716                 goto err_unlock;
 717
 718         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
 719             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 720                 ret = i915_gem_object_set_to_cpu_domain(obj, false);
 721                 if (ret)
 722                         goto err_unpin;
 723                 else
 724                         goto out;
 725         }
 726
 727         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 728
 729         /* If we're not in the cpu read domain, set ourself into the gtt
 730          * read domain and manually flush cachelines (if required). This
 731          * optimizes for the case when the gpu will dirty the data
 732          * anyway again before the next pread happens.
 733          */
 734         if (!obj->cache_dirty &&
 735             !(obj->read_domains & I915_GEM_DOMAIN_CPU))
 736                 *needs_clflush = CLFLUSH_BEFORE;
 737
 738 out:
 739         /* return with the pages pinned */
 740         return 0;
 741
 742 err_unpin:
 743         i915_gem_object_unpin_pages(obj);
 744 err_unlock:
 745         i915_gem_object_unlock(obj);
 746         return ret;
 747 }
 748
 749 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
 750                                   unsigned int *needs_clflush)
 751 {
 752         int ret;
 753
 754         *needs_clflush = 0;
 755         if (!i915_gem_object_has_struct_page(obj))
 756                 return -ENODEV;
 757
 758         ret = i915_gem_object_lock_interruptible(obj);
 759         if (ret)
 760                 return ret;
 761
 762         ret = i915_gem_object_wait(obj,
 763                                    I915_WAIT_INTERRUPTIBLE |
 764                                    I915_WAIT_ALL,
 765                                    MAX_SCHEDULE_TIMEOUT);
 766         if (ret)
 767                 goto err_unlock;
 768
 769         ret = i915_gem_object_pin_pages(obj);
 770         if (ret)
 771                 goto err_unlock;
 772
 773         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
 774             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 775                 ret = i915_gem_object_set_to_cpu_domain(obj, true);
 776                 if (ret)
 777                         goto err_unpin;
 778                 else
 779                         goto out;
 780         }
 781
 782         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 783
 784         /* If we're not in the cpu write domain, set ourself into the
 785          * gtt write domain and manually flush cachelines (as required).
 786          * This optimizes for the case when the gpu will use the data
 787          * right away and we therefore have to clflush anyway.
 788          */
 789         if (!obj->cache_dirty) {
 790                 *needs_clflush |= CLFLUSH_AFTER;
 791
 792                 /*
 793                  * Same trick applies to invalidate partially written
 794                  * cachelines read before writing.
 795                  */
 796                 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
 797                         *needs_clflush |= CLFLUSH_BEFORE;
 798         }
 799
 800 out:
 801         intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
 802         obj->mm.dirty = true;
 803         /* return with the pages pinned */
 804         return 0;
 805
 806 err_unpin:
 807         i915_gem_object_unpin_pages(obj);
 808 err_unlock:
 809         i915_gem_object_unlock(obj);
 810         return ret;
 811 }