]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/gpu/drm/i915/gem/i915_gem_domain.c
drm/i915: Avoid calling i915_gem_object_unbind holding object lock
[linux.git] / drivers / gpu / drm / i915 / gem / i915_gem_domain.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2016 Intel Corporation
5  */
6
7 #include "display/intel_frontbuffer.h"
8
9 #include "i915_drv.h"
10 #include "i915_gem_clflush.h"
11 #include "i915_gem_gtt.h"
12 #include "i915_gem_ioctls.h"
13 #include "i915_gem_object.h"
14 #include "i915_vma.h"
15 #include "i915_gem_lmem.h"
16 #include "i915_gem_mman.h"
17
18 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
19 {
20         /*
21          * We manually flush the CPU domain so that we can override and
22          * force the flush for the display, and perform it asyncrhonously.
23          */
24         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
25         if (obj->cache_dirty)
26                 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
27         obj->write_domain = 0;
28 }
29
30 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
31 {
32         if (!i915_gem_object_is_framebuffer(obj))
33                 return;
34
35         i915_gem_object_lock(obj);
36         __i915_gem_object_flush_for_display(obj);
37         i915_gem_object_unlock(obj);
38 }
39
40 /**
41  * Moves a single object to the WC read, and possibly write domain.
42  * @obj: object to act on
43  * @write: ask for write access or read only
44  *
45  * This function returns when the move is complete, including waiting on
46  * flushes to occur.
47  */
48 int
49 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
50 {
51         int ret;
52
53         assert_object_held(obj);
54
55         ret = i915_gem_object_wait(obj,
56                                    I915_WAIT_INTERRUPTIBLE |
57                                    (write ? I915_WAIT_ALL : 0),
58                                    MAX_SCHEDULE_TIMEOUT);
59         if (ret)
60                 return ret;
61
62         if (obj->write_domain == I915_GEM_DOMAIN_WC)
63                 return 0;
64
65         /* Flush and acquire obj->pages so that we are coherent through
66          * direct access in memory with previous cached writes through
67          * shmemfs and that our cache domain tracking remains valid.
68          * For example, if the obj->filp was moved to swap without us
69          * being notified and releasing the pages, we would mistakenly
70          * continue to assume that the obj remained out of the CPU cached
71          * domain.
72          */
73         ret = i915_gem_object_pin_pages(obj);
74         if (ret)
75                 return ret;
76
77         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
78
79         /* Serialise direct access to this object with the barriers for
80          * coherent writes from the GPU, by effectively invalidating the
81          * WC domain upon first access.
82          */
83         if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
84                 mb();
85
86         /* It should now be out of any other write domains, and we can update
87          * the domain values for our changes.
88          */
89         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
90         obj->read_domains |= I915_GEM_DOMAIN_WC;
91         if (write) {
92                 obj->read_domains = I915_GEM_DOMAIN_WC;
93                 obj->write_domain = I915_GEM_DOMAIN_WC;
94                 obj->mm.dirty = true;
95         }
96
97         i915_gem_object_unpin_pages(obj);
98         return 0;
99 }
100
101 /**
102  * Moves a single object to the GTT read, and possibly write domain.
103  * @obj: object to act on
104  * @write: ask for write access or read only
105  *
106  * This function returns when the move is complete, including waiting on
107  * flushes to occur.
108  */
109 int
110 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
111 {
112         int ret;
113
114         assert_object_held(obj);
115
116         ret = i915_gem_object_wait(obj,
117                                    I915_WAIT_INTERRUPTIBLE |
118                                    (write ? I915_WAIT_ALL : 0),
119                                    MAX_SCHEDULE_TIMEOUT);
120         if (ret)
121                 return ret;
122
123         if (obj->write_domain == I915_GEM_DOMAIN_GTT)
124                 return 0;
125
126         /* Flush and acquire obj->pages so that we are coherent through
127          * direct access in memory with previous cached writes through
128          * shmemfs and that our cache domain tracking remains valid.
129          * For example, if the obj->filp was moved to swap without us
130          * being notified and releasing the pages, we would mistakenly
131          * continue to assume that the obj remained out of the CPU cached
132          * domain.
133          */
134         ret = i915_gem_object_pin_pages(obj);
135         if (ret)
136                 return ret;
137
138         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
139
140         /* Serialise direct access to this object with the barriers for
141          * coherent writes from the GPU, by effectively invalidating the
142          * GTT domain upon first access.
143          */
144         if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
145                 mb();
146
147         /* It should now be out of any other write domains, and we can update
148          * the domain values for our changes.
149          */
150         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
151         obj->read_domains |= I915_GEM_DOMAIN_GTT;
152         if (write) {
153                 struct i915_vma *vma;
154
155                 obj->read_domains = I915_GEM_DOMAIN_GTT;
156                 obj->write_domain = I915_GEM_DOMAIN_GTT;
157                 obj->mm.dirty = true;
158
159                 spin_lock(&obj->vma.lock);
160                 for_each_ggtt_vma(vma, obj)
161                         if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
162                                 i915_vma_set_ggtt_write(vma);
163                 spin_unlock(&obj->vma.lock);
164         }
165
166         i915_gem_object_unpin_pages(obj);
167         return 0;
168 }
169
170 /**
171  * Changes the cache-level of an object across all VMA.
172  * @obj: object to act on
173  * @cache_level: new cache level to set for the object
174  *
175  * After this function returns, the object will be in the new cache-level
176  * across all GTT and the contents of the backing storage will be coherent,
177  * with respect to the new cache-level. In order to keep the backing storage
178  * coherent for all users, we only allow a single cache level to be set
179  * globally on the object and prevent it from being changed whilst the
180  * hardware is reading from the object. That is if the object is currently
181  * on the scanout it will be set to uncached (or equivalent display
182  * cache coherency) and all non-MOCS GPU access will also be uncached so
183  * that all direct access to the scanout remains coherent.
184  */
185 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
186                                     enum i915_cache_level cache_level)
187 {
188         int ret;
189
190         if (obj->cache_level == cache_level)
191                 return 0;
192
193         ret = i915_gem_object_lock_interruptible(obj);
194         if (ret)
195                 return ret;
196
197         /* Always invalidate stale cachelines */
198         if (obj->cache_level != cache_level) {
199                 i915_gem_object_set_cache_coherency(obj, cache_level);
200                 obj->cache_dirty = true;
201         }
202
203         i915_gem_object_unlock(obj);
204
205         /* The cache-level will be applied when each vma is rebound. */
206         return i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
207 }
208
209 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
210                                struct drm_file *file)
211 {
212         struct drm_i915_gem_caching *args = data;
213         struct drm_i915_gem_object *obj;
214         int err = 0;
215
216         rcu_read_lock();
217         obj = i915_gem_object_lookup_rcu(file, args->handle);
218         if (!obj) {
219                 err = -ENOENT;
220                 goto out;
221         }
222
223         switch (obj->cache_level) {
224         case I915_CACHE_LLC:
225         case I915_CACHE_L3_LLC:
226                 args->caching = I915_CACHING_CACHED;
227                 break;
228
229         case I915_CACHE_WT:
230                 args->caching = I915_CACHING_DISPLAY;
231                 break;
232
233         default:
234                 args->caching = I915_CACHING_NONE;
235                 break;
236         }
237 out:
238         rcu_read_unlock();
239         return err;
240 }
241
242 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
243                                struct drm_file *file)
244 {
245         struct drm_i915_private *i915 = to_i915(dev);
246         struct drm_i915_gem_caching *args = data;
247         struct drm_i915_gem_object *obj;
248         enum i915_cache_level level;
249         int ret = 0;
250
251         switch (args->caching) {
252         case I915_CACHING_NONE:
253                 level = I915_CACHE_NONE;
254                 break;
255         case I915_CACHING_CACHED:
256                 /*
257                  * Due to a HW issue on BXT A stepping, GPU stores via a
258                  * snooped mapping may leave stale data in a corresponding CPU
259                  * cacheline, whereas normally such cachelines would get
260                  * invalidated.
261                  */
262                 if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
263                         return -ENODEV;
264
265                 level = I915_CACHE_LLC;
266                 break;
267         case I915_CACHING_DISPLAY:
268                 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
269                 break;
270         default:
271                 return -EINVAL;
272         }
273
274         obj = i915_gem_object_lookup(file, args->handle);
275         if (!obj)
276                 return -ENOENT;
277
278         /*
279          * The caching mode of proxy object is handled by its generator, and
280          * not allowed to be changed by userspace.
281          */
282         if (i915_gem_object_is_proxy(obj)) {
283                 ret = -ENXIO;
284                 goto out;
285         }
286
287         ret = i915_gem_object_set_cache_level(obj, level);
288
289 out:
290         i915_gem_object_put(obj);
291         return ret;
292 }
293
294 /*
295  * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
296  * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
297  * (for pageflips). We only flush the caches while preparing the buffer for
298  * display, the callers are responsible for frontbuffer flush.
299  */
300 struct i915_vma *
301 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
302                                      u32 alignment,
303                                      const struct i915_ggtt_view *view,
304                                      unsigned int flags)
305 {
306         struct drm_i915_private *i915 = to_i915(obj->base.dev);
307         struct i915_vma *vma;
308         int ret;
309
310         /* Frame buffer must be in LMEM (no migration yet) */
311         if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
312                 return ERR_PTR(-EINVAL);
313
314         /*
315          * The display engine is not coherent with the LLC cache on gen6.  As
316          * a result, we make sure that the pinning that is about to occur is
317          * done with uncached PTEs. This is lowest common denominator for all
318          * chipsets.
319          *
320          * However for gen6+, we could do better by using the GFDT bit instead
321          * of uncaching, which would allow us to flush all the LLC-cached data
322          * with that bit in the PTE to main memory with just one PIPE_CONTROL.
323          */
324         ret = i915_gem_object_set_cache_level(obj,
325                                               HAS_WT(i915) ?
326                                               I915_CACHE_WT : I915_CACHE_NONE);
327         if (ret)
328                 return ERR_PTR(ret);
329
330         /*
331          * As the user may map the buffer once pinned in the display plane
332          * (e.g. libkms for the bootup splash), we have to ensure that we
333          * always use map_and_fenceable for all scanout buffers. However,
334          * it may simply be too big to fit into mappable, in which case
335          * put it anyway and hope that userspace can cope (but always first
336          * try to preserve the existing ABI).
337          */
338         vma = ERR_PTR(-ENOSPC);
339         if ((flags & PIN_MAPPABLE) == 0 &&
340             (!view || view->type == I915_GGTT_VIEW_NORMAL))
341                 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
342                                                flags |
343                                                PIN_MAPPABLE |
344                                                PIN_NONBLOCK);
345         if (IS_ERR(vma))
346                 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
347         if (IS_ERR(vma))
348                 return vma;
349
350         vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
351
352         i915_gem_object_flush_if_display(obj);
353
354         return vma;
355 }
356
357 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
358 {
359         struct drm_i915_private *i915 = to_i915(obj->base.dev);
360         struct i915_vma *vma;
361
362         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
363         if (!atomic_read(&obj->bind_count))
364                 return;
365
366         mutex_lock(&i915->ggtt.vm.mutex);
367         spin_lock(&obj->vma.lock);
368         for_each_ggtt_vma(vma, obj) {
369                 if (!drm_mm_node_allocated(&vma->node))
370                         continue;
371
372                 GEM_BUG_ON(vma->vm != &i915->ggtt.vm);
373                 list_move_tail(&vma->vm_link, &vma->vm->bound_list);
374         }
375         spin_unlock(&obj->vma.lock);
376         mutex_unlock(&i915->ggtt.vm.mutex);
377
378         if (i915_gem_object_is_shrinkable(obj)) {
379                 unsigned long flags;
380
381                 spin_lock_irqsave(&i915->mm.obj_lock, flags);
382
383                 if (obj->mm.madv == I915_MADV_WILLNEED &&
384                     !atomic_read(&obj->mm.shrink_pin))
385                         list_move_tail(&obj->mm.link, &i915->mm.shrink_list);
386
387                 spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
388         }
389 }
390
391 void
392 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
393 {
394         struct drm_i915_gem_object *obj = vma->obj;
395
396         assert_object_held(obj);
397
398         /* Bump the LRU to try and avoid premature eviction whilst flipping  */
399         i915_gem_object_bump_inactive_ggtt(obj);
400
401         i915_vma_unpin(vma);
402 }
403
404 /**
405  * Moves a single object to the CPU read, and possibly write domain.
406  * @obj: object to act on
407  * @write: requesting write or read-only access
408  *
409  * This function returns when the move is complete, including waiting on
410  * flushes to occur.
411  */
412 int
413 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
414 {
415         int ret;
416
417         assert_object_held(obj);
418
419         ret = i915_gem_object_wait(obj,
420                                    I915_WAIT_INTERRUPTIBLE |
421                                    (write ? I915_WAIT_ALL : 0),
422                                    MAX_SCHEDULE_TIMEOUT);
423         if (ret)
424                 return ret;
425
426         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
427
428         /* Flush the CPU cache if it's still invalid. */
429         if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
430                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
431                 obj->read_domains |= I915_GEM_DOMAIN_CPU;
432         }
433
434         /* It should now be out of any other write domains, and we can update
435          * the domain values for our changes.
436          */
437         GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
438
439         /* If we're writing through the CPU, then the GPU read domains will
440          * need to be invalidated at next use.
441          */
442         if (write)
443                 __start_cpu_write(obj);
444
445         return 0;
446 }
447
448 /**
449  * Called when user space prepares to use an object with the CPU, either
450  * through the mmap ioctl's mapping or a GTT mapping.
451  * @dev: drm device
452  * @data: ioctl data blob
453  * @file: drm file
454  */
455 int
456 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
457                           struct drm_file *file)
458 {
459         struct drm_i915_gem_set_domain *args = data;
460         struct drm_i915_gem_object *obj;
461         u32 read_domains = args->read_domains;
462         u32 write_domain = args->write_domain;
463         int err;
464
465         /* Only handle setting domains to types used by the CPU. */
466         if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
467                 return -EINVAL;
468
469         /*
470          * Having something in the write domain implies it's in the read
471          * domain, and only that read domain.  Enforce that in the request.
472          */
473         if (write_domain && read_domains != write_domain)
474                 return -EINVAL;
475
476         if (!read_domains)
477                 return 0;
478
479         obj = i915_gem_object_lookup(file, args->handle);
480         if (!obj)
481                 return -ENOENT;
482
483         /*
484          * Already in the desired write domain? Nothing for us to do!
485          *
486          * We apply a little bit of cunning here to catch a broader set of
487          * no-ops. If obj->write_domain is set, we must be in the same
488          * obj->read_domains, and only that domain. Therefore, if that
489          * obj->write_domain matches the request read_domains, we are
490          * already in the same read/write domain and can skip the operation,
491          * without having to further check the requested write_domain.
492          */
493         if (READ_ONCE(obj->write_domain) == read_domains) {
494                 err = 0;
495                 goto out;
496         }
497
498         /*
499          * Try to flush the object off the GPU without holding the lock.
500          * We will repeat the flush holding the lock in the normal manner
501          * to catch cases where we are gazumped.
502          */
503         err = i915_gem_object_wait(obj,
504                                    I915_WAIT_INTERRUPTIBLE |
505                                    I915_WAIT_PRIORITY |
506                                    (write_domain ? I915_WAIT_ALL : 0),
507                                    MAX_SCHEDULE_TIMEOUT);
508         if (err)
509                 goto out;
510
511         /*
512          * Proxy objects do not control access to the backing storage, ergo
513          * they cannot be used as a means to manipulate the cache domain
514          * tracking for that backing storage. The proxy object is always
515          * considered to be outside of any cache domain.
516          */
517         if (i915_gem_object_is_proxy(obj)) {
518                 err = -ENXIO;
519                 goto out;
520         }
521
522         /*
523          * Flush and acquire obj->pages so that we are coherent through
524          * direct access in memory with previous cached writes through
525          * shmemfs and that our cache domain tracking remains valid.
526          * For example, if the obj->filp was moved to swap without us
527          * being notified and releasing the pages, we would mistakenly
528          * continue to assume that the obj remained out of the CPU cached
529          * domain.
530          */
531         err = i915_gem_object_pin_pages(obj);
532         if (err)
533                 goto out;
534
535         err = i915_gem_object_lock_interruptible(obj);
536         if (err)
537                 goto out_unpin;
538
539         if (read_domains & I915_GEM_DOMAIN_WC)
540                 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
541         else if (read_domains & I915_GEM_DOMAIN_GTT)
542                 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
543         else
544                 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
545
546         /* And bump the LRU for this access */
547         i915_gem_object_bump_inactive_ggtt(obj);
548
549         i915_gem_object_unlock(obj);
550
551         if (write_domain)
552                 intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
553
554 out_unpin:
555         i915_gem_object_unpin_pages(obj);
556 out:
557         i915_gem_object_put(obj);
558         return err;
559 }
560
561 /*
562  * Pins the specified object's pages and synchronizes the object with
563  * GPU accesses. Sets needs_clflush to non-zero if the caller should
564  * flush the object from the CPU cache.
565  */
566 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
567                                  unsigned int *needs_clflush)
568 {
569         int ret;
570
571         *needs_clflush = 0;
572         if (!i915_gem_object_has_struct_page(obj))
573                 return -ENODEV;
574
575         ret = i915_gem_object_lock_interruptible(obj);
576         if (ret)
577                 return ret;
578
579         ret = i915_gem_object_wait(obj,
580                                    I915_WAIT_INTERRUPTIBLE,
581                                    MAX_SCHEDULE_TIMEOUT);
582         if (ret)
583                 goto err_unlock;
584
585         ret = i915_gem_object_pin_pages(obj);
586         if (ret)
587                 goto err_unlock;
588
589         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
590             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
591                 ret = i915_gem_object_set_to_cpu_domain(obj, false);
592                 if (ret)
593                         goto err_unpin;
594                 else
595                         goto out;
596         }
597
598         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
599
600         /* If we're not in the cpu read domain, set ourself into the gtt
601          * read domain and manually flush cachelines (if required). This
602          * optimizes for the case when the gpu will dirty the data
603          * anyway again before the next pread happens.
604          */
605         if (!obj->cache_dirty &&
606             !(obj->read_domains & I915_GEM_DOMAIN_CPU))
607                 *needs_clflush = CLFLUSH_BEFORE;
608
609 out:
610         /* return with the pages pinned */
611         return 0;
612
613 err_unpin:
614         i915_gem_object_unpin_pages(obj);
615 err_unlock:
616         i915_gem_object_unlock(obj);
617         return ret;
618 }
619
620 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
621                                   unsigned int *needs_clflush)
622 {
623         int ret;
624
625         *needs_clflush = 0;
626         if (!i915_gem_object_has_struct_page(obj))
627                 return -ENODEV;
628
629         ret = i915_gem_object_lock_interruptible(obj);
630         if (ret)
631                 return ret;
632
633         ret = i915_gem_object_wait(obj,
634                                    I915_WAIT_INTERRUPTIBLE |
635                                    I915_WAIT_ALL,
636                                    MAX_SCHEDULE_TIMEOUT);
637         if (ret)
638                 goto err_unlock;
639
640         ret = i915_gem_object_pin_pages(obj);
641         if (ret)
642                 goto err_unlock;
643
644         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
645             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
646                 ret = i915_gem_object_set_to_cpu_domain(obj, true);
647                 if (ret)
648                         goto err_unpin;
649                 else
650                         goto out;
651         }
652
653         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
654
655         /* If we're not in the cpu write domain, set ourself into the
656          * gtt write domain and manually flush cachelines (as required).
657          * This optimizes for the case when the gpu will use the data
658          * right away and we therefore have to clflush anyway.
659          */
660         if (!obj->cache_dirty) {
661                 *needs_clflush |= CLFLUSH_AFTER;
662
663                 /*
664                  * Same trick applies to invalidate partially written
665                  * cachelines read before writing.
666                  */
667                 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
668                         *needs_clflush |= CLFLUSH_BEFORE;
669         }
670
671 out:
672         intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
673         obj->mm.dirty = true;
674         /* return with the pages pinned */
675         return 0;
676
677 err_unpin:
678         i915_gem_object_unpin_pages(obj);
679 err_unlock:
680         i915_gem_object_unlock(obj);
681         return ret;
682 }