]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/gpu/drm/i915/i915_gem.c
drm/i915: Move shmem object setup to its own file
[linux.git] / drivers / gpu / drm / i915 / i915_gem.c
1 /*
2  * Copyright © 2008-2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27
28 #include <drm/drm_vma_manager.h>
29 #include <drm/drm_pci.h>
30 #include <drm/i915_drm.h>
31 #include <linux/dma-fence-array.h>
32 #include <linux/kthread.h>
33 #include <linux/reservation.h>
34 #include <linux/shmem_fs.h>
35 #include <linux/slab.h>
36 #include <linux/stop_machine.h>
37 #include <linux/swap.h>
38 #include <linux/pci.h>
39 #include <linux/dma-buf.h>
40 #include <linux/mman.h>
41
42 #include "gem/i915_gem_ioctls.h"
43 #include "gt/intel_engine_pm.h"
44 #include "gt/intel_gt_pm.h"
45 #include "gt/intel_mocs.h"
46 #include "gt/intel_reset.h"
47 #include "gt/intel_workarounds.h"
48
49 #include "i915_drv.h"
50 #include "i915_gem_clflush.h"
51 #include "i915_gemfs.h"
52 #include "i915_gem_pm.h"
53 #include "i915_trace.h"
54 #include "i915_vgpu.h"
55
56 #include "intel_display.h"
57 #include "intel_drv.h"
58 #include "intel_frontbuffer.h"
59 #include "intel_pm.h"
60
61 static int
62 insert_mappable_node(struct i915_ggtt *ggtt,
63                      struct drm_mm_node *node, u32 size)
64 {
65         memset(node, 0, sizeof(*node));
66         return drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
67                                            size, 0, I915_COLOR_UNEVICTABLE,
68                                            0, ggtt->mappable_end,
69                                            DRM_MM_INSERT_LOW);
70 }
71
72 static void
73 remove_mappable_node(struct drm_mm_node *node)
74 {
75         drm_mm_remove_node(node);
76 }
77
78 int
79 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
80                             struct drm_file *file)
81 {
82         struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
83         struct drm_i915_gem_get_aperture *args = data;
84         struct i915_vma *vma;
85         u64 pinned;
86
87         mutex_lock(&ggtt->vm.mutex);
88
89         pinned = ggtt->vm.reserved;
90         list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
91                 if (i915_vma_is_pinned(vma))
92                         pinned += vma->node.size;
93
94         mutex_unlock(&ggtt->vm.mutex);
95
96         args->aper_size = ggtt->vm.total;
97         args->aper_available_size = args->aper_size - pinned;
98
99         return 0;
100 }
101
102 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
103 {
104         struct address_space *mapping = obj->base.filp->f_mapping;
105         drm_dma_handle_t *phys;
106         struct sg_table *st;
107         struct scatterlist *sg;
108         char *vaddr;
109         int i;
110         int err;
111
112         if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
113                 return -EINVAL;
114
115         /* Always aligning to the object size, allows a single allocation
116          * to handle all possible callers, and given typical object sizes,
117          * the alignment of the buddy allocation will naturally match.
118          */
119         phys = drm_pci_alloc(obj->base.dev,
120                              roundup_pow_of_two(obj->base.size),
121                              roundup_pow_of_two(obj->base.size));
122         if (!phys)
123                 return -ENOMEM;
124
125         vaddr = phys->vaddr;
126         for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
127                 struct page *page;
128                 char *src;
129
130                 page = shmem_read_mapping_page(mapping, i);
131                 if (IS_ERR(page)) {
132                         err = PTR_ERR(page);
133                         goto err_phys;
134                 }
135
136                 src = kmap_atomic(page);
137                 memcpy(vaddr, src, PAGE_SIZE);
138                 drm_clflush_virt_range(vaddr, PAGE_SIZE);
139                 kunmap_atomic(src);
140
141                 put_page(page);
142                 vaddr += PAGE_SIZE;
143         }
144
145         i915_gem_chipset_flush(to_i915(obj->base.dev));
146
147         st = kmalloc(sizeof(*st), GFP_KERNEL);
148         if (!st) {
149                 err = -ENOMEM;
150                 goto err_phys;
151         }
152
153         if (sg_alloc_table(st, 1, GFP_KERNEL)) {
154                 kfree(st);
155                 err = -ENOMEM;
156                 goto err_phys;
157         }
158
159         sg = st->sgl;
160         sg->offset = 0;
161         sg->length = obj->base.size;
162
163         sg_dma_address(sg) = phys->busaddr;
164         sg_dma_len(sg) = obj->base.size;
165
166         obj->phys_handle = phys;
167
168         __i915_gem_object_set_pages(obj, st, sg->length);
169
170         return 0;
171
172 err_phys:
173         drm_pci_free(obj->base.dev, phys);
174
175         return err;
176 }
177
178 static void
179 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
180                                struct sg_table *pages)
181 {
182         __i915_gem_object_release_shmem(obj, pages, false);
183
184         if (obj->mm.dirty) {
185                 struct address_space *mapping = obj->base.filp->f_mapping;
186                 char *vaddr = obj->phys_handle->vaddr;
187                 int i;
188
189                 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
190                         struct page *page;
191                         char *dst;
192
193                         page = shmem_read_mapping_page(mapping, i);
194                         if (IS_ERR(page))
195                                 continue;
196
197                         dst = kmap_atomic(page);
198                         drm_clflush_virt_range(vaddr, PAGE_SIZE);
199                         memcpy(dst, vaddr, PAGE_SIZE);
200                         kunmap_atomic(dst);
201
202                         set_page_dirty(page);
203                         if (obj->mm.madv == I915_MADV_WILLNEED)
204                                 mark_page_accessed(page);
205                         put_page(page);
206                         vaddr += PAGE_SIZE;
207                 }
208                 obj->mm.dirty = false;
209         }
210
211         sg_free_table(pages);
212         kfree(pages);
213
214         drm_pci_free(obj->base.dev, obj->phys_handle);
215 }
216
217 static void
218 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
219 {
220         i915_gem_object_unpin_pages(obj);
221 }
222
223 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
224         .get_pages = i915_gem_object_get_pages_phys,
225         .put_pages = i915_gem_object_put_pages_phys,
226         .release = i915_gem_object_release_phys,
227 };
228
229 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
230 {
231         struct i915_vma *vma;
232         LIST_HEAD(still_in_list);
233         int ret;
234
235         lockdep_assert_held(&obj->base.dev->struct_mutex);
236
237         /* Closed vma are removed from the obj->vma_list - but they may
238          * still have an active binding on the object. To remove those we
239          * must wait for all rendering to complete to the object (as unbinding
240          * must anyway), and retire the requests.
241          */
242         ret = i915_gem_object_set_to_cpu_domain(obj, false);
243         if (ret)
244                 return ret;
245
246         spin_lock(&obj->vma.lock);
247         while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
248                                                        struct i915_vma,
249                                                        obj_link))) {
250                 list_move_tail(&vma->obj_link, &still_in_list);
251                 spin_unlock(&obj->vma.lock);
252
253                 ret = i915_vma_unbind(vma);
254
255                 spin_lock(&obj->vma.lock);
256         }
257         list_splice(&still_in_list, &obj->vma.list);
258         spin_unlock(&obj->vma.lock);
259
260         return ret;
261 }
262
263 static long
264 i915_gem_object_wait_fence(struct dma_fence *fence,
265                            unsigned int flags,
266                            long timeout)
267 {
268         struct i915_request *rq;
269
270         BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
271
272         if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
273                 return timeout;
274
275         if (!dma_fence_is_i915(fence))
276                 return dma_fence_wait_timeout(fence,
277                                               flags & I915_WAIT_INTERRUPTIBLE,
278                                               timeout);
279
280         rq = to_request(fence);
281         if (i915_request_completed(rq))
282                 goto out;
283
284         timeout = i915_request_wait(rq, flags, timeout);
285
286 out:
287         if (flags & I915_WAIT_LOCKED && i915_request_completed(rq))
288                 i915_request_retire_upto(rq);
289
290         return timeout;
291 }
292
293 static long
294 i915_gem_object_wait_reservation(struct reservation_object *resv,
295                                  unsigned int flags,
296                                  long timeout)
297 {
298         unsigned int seq = __read_seqcount_begin(&resv->seq);
299         struct dma_fence *excl;
300         bool prune_fences = false;
301
302         if (flags & I915_WAIT_ALL) {
303                 struct dma_fence **shared;
304                 unsigned int count, i;
305                 int ret;
306
307                 ret = reservation_object_get_fences_rcu(resv,
308                                                         &excl, &count, &shared);
309                 if (ret)
310                         return ret;
311
312                 for (i = 0; i < count; i++) {
313                         timeout = i915_gem_object_wait_fence(shared[i],
314                                                              flags, timeout);
315                         if (timeout < 0)
316                                 break;
317
318                         dma_fence_put(shared[i]);
319                 }
320
321                 for (; i < count; i++)
322                         dma_fence_put(shared[i]);
323                 kfree(shared);
324
325                 /*
326                  * If both shared fences and an exclusive fence exist,
327                  * then by construction the shared fences must be later
328                  * than the exclusive fence. If we successfully wait for
329                  * all the shared fences, we know that the exclusive fence
330                  * must all be signaled. If all the shared fences are
331                  * signaled, we can prune the array and recover the
332                  * floating references on the fences/requests.
333                  */
334                 prune_fences = count && timeout >= 0;
335         } else {
336                 excl = reservation_object_get_excl_rcu(resv);
337         }
338
339         if (excl && timeout >= 0)
340                 timeout = i915_gem_object_wait_fence(excl, flags, timeout);
341
342         dma_fence_put(excl);
343
344         /*
345          * Opportunistically prune the fences iff we know they have *all* been
346          * signaled and that the reservation object has not been changed (i.e.
347          * no new fences have been added).
348          */
349         if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
350                 if (reservation_object_trylock(resv)) {
351                         if (!__read_seqcount_retry(&resv->seq, seq))
352                                 reservation_object_add_excl_fence(resv, NULL);
353                         reservation_object_unlock(resv);
354                 }
355         }
356
357         return timeout;
358 }
359
360 static void __fence_set_priority(struct dma_fence *fence,
361                                  const struct i915_sched_attr *attr)
362 {
363         struct i915_request *rq;
364         struct intel_engine_cs *engine;
365
366         if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
367                 return;
368
369         rq = to_request(fence);
370         engine = rq->engine;
371
372         local_bh_disable();
373         rcu_read_lock(); /* RCU serialisation for set-wedged protection */
374         if (engine->schedule)
375                 engine->schedule(rq, attr);
376         rcu_read_unlock();
377         local_bh_enable(); /* kick the tasklets if queues were reprioritised */
378 }
379
380 static void fence_set_priority(struct dma_fence *fence,
381                                const struct i915_sched_attr *attr)
382 {
383         /* Recurse once into a fence-array */
384         if (dma_fence_is_array(fence)) {
385                 struct dma_fence_array *array = to_dma_fence_array(fence);
386                 int i;
387
388                 for (i = 0; i < array->num_fences; i++)
389                         __fence_set_priority(array->fences[i], attr);
390         } else {
391                 __fence_set_priority(fence, attr);
392         }
393 }
394
395 int
396 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
397                               unsigned int flags,
398                               const struct i915_sched_attr *attr)
399 {
400         struct dma_fence *excl;
401
402         if (flags & I915_WAIT_ALL) {
403                 struct dma_fence **shared;
404                 unsigned int count, i;
405                 int ret;
406
407                 ret = reservation_object_get_fences_rcu(obj->resv,
408                                                         &excl, &count, &shared);
409                 if (ret)
410                         return ret;
411
412                 for (i = 0; i < count; i++) {
413                         fence_set_priority(shared[i], attr);
414                         dma_fence_put(shared[i]);
415                 }
416
417                 kfree(shared);
418         } else {
419                 excl = reservation_object_get_excl_rcu(obj->resv);
420         }
421
422         if (excl) {
423                 fence_set_priority(excl, attr);
424                 dma_fence_put(excl);
425         }
426         return 0;
427 }
428
429 /**
430  * Waits for rendering to the object to be completed
431  * @obj: i915 gem object
432  * @flags: how to wait (under a lock, for all rendering or just for writes etc)
433  * @timeout: how long to wait
434  */
435 int
436 i915_gem_object_wait(struct drm_i915_gem_object *obj,
437                      unsigned int flags,
438                      long timeout)
439 {
440         might_sleep();
441         GEM_BUG_ON(timeout < 0);
442
443         timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout);
444         return timeout < 0 ? timeout : 0;
445 }
446
447 static int
448 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
449                      struct drm_i915_gem_pwrite *args,
450                      struct drm_file *file)
451 {
452         void *vaddr = obj->phys_handle->vaddr + args->offset;
453         char __user *user_data = u64_to_user_ptr(args->data_ptr);
454
455         /* We manually control the domain here and pretend that it
456          * remains coherent i.e. in the GTT domain, like shmem_pwrite.
457          */
458         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
459         if (copy_from_user(vaddr, user_data, args->size))
460                 return -EFAULT;
461
462         drm_clflush_virt_range(vaddr, args->size);
463         i915_gem_chipset_flush(to_i915(obj->base.dev));
464
465         intel_fb_obj_flush(obj, ORIGIN_CPU);
466         return 0;
467 }
468
469 static int
470 i915_gem_create(struct drm_file *file,
471                 struct drm_i915_private *dev_priv,
472                 u64 *size_p,
473                 u32 *handle_p)
474 {
475         struct drm_i915_gem_object *obj;
476         u32 handle;
477         u64 size;
478         int ret;
479
480         size = round_up(*size_p, PAGE_SIZE);
481         if (size == 0)
482                 return -EINVAL;
483
484         /* Allocate the new object */
485         obj = i915_gem_object_create_shmem(dev_priv, size);
486         if (IS_ERR(obj))
487                 return PTR_ERR(obj);
488
489         ret = drm_gem_handle_create(file, &obj->base, &handle);
490         /* drop reference from allocate - handle holds it now */
491         i915_gem_object_put(obj);
492         if (ret)
493                 return ret;
494
495         *handle_p = handle;
496         *size_p = size;
497         return 0;
498 }
499
500 int
501 i915_gem_dumb_create(struct drm_file *file,
502                      struct drm_device *dev,
503                      struct drm_mode_create_dumb *args)
504 {
505         int cpp = DIV_ROUND_UP(args->bpp, 8);
506         u32 format;
507
508         switch (cpp) {
509         case 1:
510                 format = DRM_FORMAT_C8;
511                 break;
512         case 2:
513                 format = DRM_FORMAT_RGB565;
514                 break;
515         case 4:
516                 format = DRM_FORMAT_XRGB8888;
517                 break;
518         default:
519                 return -EINVAL;
520         }
521
522         /* have to work out size/pitch and return them */
523         args->pitch = ALIGN(args->width * cpp, 64);
524
525         /* align stride to page size so that we can remap */
526         if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format,
527                                                     DRM_FORMAT_MOD_LINEAR))
528                 args->pitch = ALIGN(args->pitch, 4096);
529
530         args->size = args->pitch * args->height;
531         return i915_gem_create(file, to_i915(dev),
532                                &args->size, &args->handle);
533 }
534
535 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
536 {
537         return !(obj->cache_level == I915_CACHE_NONE ||
538                  obj->cache_level == I915_CACHE_WT);
539 }
540
541 /**
542  * Creates a new mm object and returns a handle to it.
543  * @dev: drm device pointer
544  * @data: ioctl data blob
545  * @file: drm file pointer
546  */
547 int
548 i915_gem_create_ioctl(struct drm_device *dev, void *data,
549                       struct drm_file *file)
550 {
551         struct drm_i915_private *dev_priv = to_i915(dev);
552         struct drm_i915_gem_create *args = data;
553
554         i915_gem_flush_free_objects(dev_priv);
555
556         return i915_gem_create(file, dev_priv,
557                                &args->size, &args->handle);
558 }
559
560 static inline enum fb_op_origin
561 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
562 {
563         return (domain == I915_GEM_DOMAIN_GTT ?
564                 obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
565 }
566
567 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
568 {
569         intel_wakeref_t wakeref;
570
571         /*
572          * No actual flushing is required for the GTT write domain for reads
573          * from the GTT domain. Writes to it "immediately" go to main memory
574          * as far as we know, so there's no chipset flush. It also doesn't
575          * land in the GPU render cache.
576          *
577          * However, we do have to enforce the order so that all writes through
578          * the GTT land before any writes to the device, such as updates to
579          * the GATT itself.
580          *
581          * We also have to wait a bit for the writes to land from the GTT.
582          * An uncached read (i.e. mmio) seems to be ideal for the round-trip
583          * timing. This issue has only been observed when switching quickly
584          * between GTT writes and CPU reads from inside the kernel on recent hw,
585          * and it appears to only affect discrete GTT blocks (i.e. on LLC
586          * system agents we cannot reproduce this behaviour, until Cannonlake
587          * that was!).
588          */
589
590         wmb();
591
592         if (INTEL_INFO(dev_priv)->has_coherent_ggtt)
593                 return;
594
595         i915_gem_chipset_flush(dev_priv);
596
597         with_intel_runtime_pm(dev_priv, wakeref) {
598                 spin_lock_irq(&dev_priv->uncore.lock);
599
600                 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
601
602                 spin_unlock_irq(&dev_priv->uncore.lock);
603         }
604 }
605
606 static void
607 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
608 {
609         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
610         struct i915_vma *vma;
611
612         if (!(obj->write_domain & flush_domains))
613                 return;
614
615         switch (obj->write_domain) {
616         case I915_GEM_DOMAIN_GTT:
617                 i915_gem_flush_ggtt_writes(dev_priv);
618
619                 intel_fb_obj_flush(obj,
620                                    fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
621
622                 for_each_ggtt_vma(vma, obj) {
623                         if (vma->iomap)
624                                 continue;
625
626                         i915_vma_unset_ggtt_write(vma);
627                 }
628                 break;
629
630         case I915_GEM_DOMAIN_WC:
631                 wmb();
632                 break;
633
634         case I915_GEM_DOMAIN_CPU:
635                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
636                 break;
637
638         case I915_GEM_DOMAIN_RENDER:
639                 if (gpu_write_needs_clflush(obj))
640                         obj->cache_dirty = true;
641                 break;
642         }
643
644         obj->write_domain = 0;
645 }
646
647 /*
648  * Pins the specified object's pages and synchronizes the object with
649  * GPU accesses. Sets needs_clflush to non-zero if the caller should
650  * flush the object from the CPU cache.
651  */
652 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
653                                     unsigned int *needs_clflush)
654 {
655         int ret;
656
657         lockdep_assert_held(&obj->base.dev->struct_mutex);
658
659         *needs_clflush = 0;
660         if (!i915_gem_object_has_struct_page(obj))
661                 return -ENODEV;
662
663         ret = i915_gem_object_wait(obj,
664                                    I915_WAIT_INTERRUPTIBLE |
665                                    I915_WAIT_LOCKED,
666                                    MAX_SCHEDULE_TIMEOUT);
667         if (ret)
668                 return ret;
669
670         ret = i915_gem_object_pin_pages(obj);
671         if (ret)
672                 return ret;
673
674         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
675             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
676                 ret = i915_gem_object_set_to_cpu_domain(obj, false);
677                 if (ret)
678                         goto err_unpin;
679                 else
680                         goto out;
681         }
682
683         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
684
685         /* If we're not in the cpu read domain, set ourself into the gtt
686          * read domain and manually flush cachelines (if required). This
687          * optimizes for the case when the gpu will dirty the data
688          * anyway again before the next pread happens.
689          */
690         if (!obj->cache_dirty &&
691             !(obj->read_domains & I915_GEM_DOMAIN_CPU))
692                 *needs_clflush = CLFLUSH_BEFORE;
693
694 out:
695         /* return with the pages pinned */
696         return 0;
697
698 err_unpin:
699         i915_gem_object_unpin_pages(obj);
700         return ret;
701 }
702
703 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
704                                      unsigned int *needs_clflush)
705 {
706         int ret;
707
708         lockdep_assert_held(&obj->base.dev->struct_mutex);
709
710         *needs_clflush = 0;
711         if (!i915_gem_object_has_struct_page(obj))
712                 return -ENODEV;
713
714         ret = i915_gem_object_wait(obj,
715                                    I915_WAIT_INTERRUPTIBLE |
716                                    I915_WAIT_LOCKED |
717                                    I915_WAIT_ALL,
718                                    MAX_SCHEDULE_TIMEOUT);
719         if (ret)
720                 return ret;
721
722         ret = i915_gem_object_pin_pages(obj);
723         if (ret)
724                 return ret;
725
726         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
727             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
728                 ret = i915_gem_object_set_to_cpu_domain(obj, true);
729                 if (ret)
730                         goto err_unpin;
731                 else
732                         goto out;
733         }
734
735         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
736
737         /* If we're not in the cpu write domain, set ourself into the
738          * gtt write domain and manually flush cachelines (as required).
739          * This optimizes for the case when the gpu will use the data
740          * right away and we therefore have to clflush anyway.
741          */
742         if (!obj->cache_dirty) {
743                 *needs_clflush |= CLFLUSH_AFTER;
744
745                 /*
746                  * Same trick applies to invalidate partially written
747                  * cachelines read before writing.
748                  */
749                 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
750                         *needs_clflush |= CLFLUSH_BEFORE;
751         }
752
753 out:
754         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
755         obj->mm.dirty = true;
756         /* return with the pages pinned */
757         return 0;
758
759 err_unpin:
760         i915_gem_object_unpin_pages(obj);
761         return ret;
762 }
763
764 static int
765 shmem_pread(struct page *page, int offset, int len, char __user *user_data,
766             bool needs_clflush)
767 {
768         char *vaddr;
769         int ret;
770
771         vaddr = kmap(page);
772
773         if (needs_clflush)
774                 drm_clflush_virt_range(vaddr + offset, len);
775
776         ret = __copy_to_user(user_data, vaddr + offset, len);
777
778         kunmap(page);
779
780         return ret ? -EFAULT : 0;
781 }
782
783 static int
784 i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
785                      struct drm_i915_gem_pread *args)
786 {
787         char __user *user_data;
788         u64 remain;
789         unsigned int needs_clflush;
790         unsigned int idx, offset;
791         int ret;
792
793         ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
794         if (ret)
795                 return ret;
796
797         ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
798         mutex_unlock(&obj->base.dev->struct_mutex);
799         if (ret)
800                 return ret;
801
802         remain = args->size;
803         user_data = u64_to_user_ptr(args->data_ptr);
804         offset = offset_in_page(args->offset);
805         for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
806                 struct page *page = i915_gem_object_get_page(obj, idx);
807                 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
808
809                 ret = shmem_pread(page, offset, length, user_data,
810                                   needs_clflush);
811                 if (ret)
812                         break;
813
814                 remain -= length;
815                 user_data += length;
816                 offset = 0;
817         }
818
819         i915_gem_obj_finish_shmem_access(obj);
820         return ret;
821 }
822
823 static inline bool
824 gtt_user_read(struct io_mapping *mapping,
825               loff_t base, int offset,
826               char __user *user_data, int length)
827 {
828         void __iomem *vaddr;
829         unsigned long unwritten;
830
831         /* We can use the cpu mem copy function because this is X86. */
832         vaddr = io_mapping_map_atomic_wc(mapping, base);
833         unwritten = __copy_to_user_inatomic(user_data,
834                                             (void __force *)vaddr + offset,
835                                             length);
836         io_mapping_unmap_atomic(vaddr);
837         if (unwritten) {
838                 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
839                 unwritten = copy_to_user(user_data,
840                                          (void __force *)vaddr + offset,
841                                          length);
842                 io_mapping_unmap(vaddr);
843         }
844         return unwritten;
845 }
846
847 static int
848 i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
849                    const struct drm_i915_gem_pread *args)
850 {
851         struct drm_i915_private *i915 = to_i915(obj->base.dev);
852         struct i915_ggtt *ggtt = &i915->ggtt;
853         intel_wakeref_t wakeref;
854         struct drm_mm_node node;
855         struct i915_vma *vma;
856         void __user *user_data;
857         u64 remain, offset;
858         int ret;
859
860         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
861         if (ret)
862                 return ret;
863
864         wakeref = intel_runtime_pm_get(i915);
865         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
866                                        PIN_MAPPABLE |
867                                        PIN_NONFAULT |
868                                        PIN_NONBLOCK);
869         if (!IS_ERR(vma)) {
870                 node.start = i915_ggtt_offset(vma);
871                 node.allocated = false;
872                 ret = i915_vma_put_fence(vma);
873                 if (ret) {
874                         i915_vma_unpin(vma);
875                         vma = ERR_PTR(ret);
876                 }
877         }
878         if (IS_ERR(vma)) {
879                 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
880                 if (ret)
881                         goto out_unlock;
882                 GEM_BUG_ON(!node.allocated);
883         }
884
885         ret = i915_gem_object_set_to_gtt_domain(obj, false);
886         if (ret)
887                 goto out_unpin;
888
889         mutex_unlock(&i915->drm.struct_mutex);
890
891         user_data = u64_to_user_ptr(args->data_ptr);
892         remain = args->size;
893         offset = args->offset;
894
895         while (remain > 0) {
896                 /* Operation in this page
897                  *
898                  * page_base = page offset within aperture
899                  * page_offset = offset within page
900                  * page_length = bytes to copy for this page
901                  */
902                 u32 page_base = node.start;
903                 unsigned page_offset = offset_in_page(offset);
904                 unsigned page_length = PAGE_SIZE - page_offset;
905                 page_length = remain < page_length ? remain : page_length;
906                 if (node.allocated) {
907                         wmb();
908                         ggtt->vm.insert_page(&ggtt->vm,
909                                              i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
910                                              node.start, I915_CACHE_NONE, 0);
911                         wmb();
912                 } else {
913                         page_base += offset & PAGE_MASK;
914                 }
915
916                 if (gtt_user_read(&ggtt->iomap, page_base, page_offset,
917                                   user_data, page_length)) {
918                         ret = -EFAULT;
919                         break;
920                 }
921
922                 remain -= page_length;
923                 user_data += page_length;
924                 offset += page_length;
925         }
926
927         mutex_lock(&i915->drm.struct_mutex);
928 out_unpin:
929         if (node.allocated) {
930                 wmb();
931                 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
932                 remove_mappable_node(&node);
933         } else {
934                 i915_vma_unpin(vma);
935         }
936 out_unlock:
937         intel_runtime_pm_put(i915, wakeref);
938         mutex_unlock(&i915->drm.struct_mutex);
939
940         return ret;
941 }
942
943 /**
944  * Reads data from the object referenced by handle.
945  * @dev: drm device pointer
946  * @data: ioctl data blob
947  * @file: drm file pointer
948  *
949  * On error, the contents of *data are undefined.
950  */
951 int
952 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
953                      struct drm_file *file)
954 {
955         struct drm_i915_gem_pread *args = data;
956         struct drm_i915_gem_object *obj;
957         int ret;
958
959         if (args->size == 0)
960                 return 0;
961
962         if (!access_ok(u64_to_user_ptr(args->data_ptr),
963                        args->size))
964                 return -EFAULT;
965
966         obj = i915_gem_object_lookup(file, args->handle);
967         if (!obj)
968                 return -ENOENT;
969
970         /* Bounds check source.  */
971         if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
972                 ret = -EINVAL;
973                 goto out;
974         }
975
976         trace_i915_gem_object_pread(obj, args->offset, args->size);
977
978         ret = i915_gem_object_wait(obj,
979                                    I915_WAIT_INTERRUPTIBLE,
980                                    MAX_SCHEDULE_TIMEOUT);
981         if (ret)
982                 goto out;
983
984         ret = i915_gem_object_pin_pages(obj);
985         if (ret)
986                 goto out;
987
988         ret = i915_gem_shmem_pread(obj, args);
989         if (ret == -EFAULT || ret == -ENODEV)
990                 ret = i915_gem_gtt_pread(obj, args);
991
992         i915_gem_object_unpin_pages(obj);
993 out:
994         i915_gem_object_put(obj);
995         return ret;
996 }
997
998 /* This is the fast write path which cannot handle
999  * page faults in the source data
1000  */
1001
1002 static inline bool
1003 ggtt_write(struct io_mapping *mapping,
1004            loff_t base, int offset,
1005            char __user *user_data, int length)
1006 {
1007         void __iomem *vaddr;
1008         unsigned long unwritten;
1009
1010         /* We can use the cpu mem copy function because this is X86. */
1011         vaddr = io_mapping_map_atomic_wc(mapping, base);
1012         unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset,
1013                                                       user_data, length);
1014         io_mapping_unmap_atomic(vaddr);
1015         if (unwritten) {
1016                 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
1017                 unwritten = copy_from_user((void __force *)vaddr + offset,
1018                                            user_data, length);
1019                 io_mapping_unmap(vaddr);
1020         }
1021
1022         return unwritten;
1023 }
1024
1025 /**
1026  * This is the fast pwrite path, where we copy the data directly from the
1027  * user into the GTT, uncached.
1028  * @obj: i915 GEM object
1029  * @args: pwrite arguments structure
1030  */
1031 static int
1032 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
1033                          const struct drm_i915_gem_pwrite *args)
1034 {
1035         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1036         struct i915_ggtt *ggtt = &i915->ggtt;
1037         intel_wakeref_t wakeref;
1038         struct drm_mm_node node;
1039         struct i915_vma *vma;
1040         u64 remain, offset;
1041         void __user *user_data;
1042         int ret;
1043
1044         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1045         if (ret)
1046                 return ret;
1047
1048         if (i915_gem_object_has_struct_page(obj)) {
1049                 /*
1050                  * Avoid waking the device up if we can fallback, as
1051                  * waking/resuming is very slow (worst-case 10-100 ms
1052                  * depending on PCI sleeps and our own resume time).
1053                  * This easily dwarfs any performance advantage from
1054                  * using the cache bypass of indirect GGTT access.
1055                  */
1056                 wakeref = intel_runtime_pm_get_if_in_use(i915);
1057                 if (!wakeref) {
1058                         ret = -EFAULT;
1059                         goto out_unlock;
1060                 }
1061         } else {
1062                 /* No backing pages, no fallback, we must force GGTT access */
1063                 wakeref = intel_runtime_pm_get(i915);
1064         }
1065
1066         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1067                                        PIN_MAPPABLE |
1068                                        PIN_NONFAULT |
1069                                        PIN_NONBLOCK);
1070         if (!IS_ERR(vma)) {
1071                 node.start = i915_ggtt_offset(vma);
1072                 node.allocated = false;
1073                 ret = i915_vma_put_fence(vma);
1074                 if (ret) {
1075                         i915_vma_unpin(vma);
1076                         vma = ERR_PTR(ret);
1077                 }
1078         }
1079         if (IS_ERR(vma)) {
1080                 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1081                 if (ret)
1082                         goto out_rpm;
1083                 GEM_BUG_ON(!node.allocated);
1084         }
1085
1086         ret = i915_gem_object_set_to_gtt_domain(obj, true);
1087         if (ret)
1088                 goto out_unpin;
1089
1090         mutex_unlock(&i915->drm.struct_mutex);
1091
1092         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1093
1094         user_data = u64_to_user_ptr(args->data_ptr);
1095         offset = args->offset;
1096         remain = args->size;
1097         while (remain) {
1098                 /* Operation in this page
1099                  *
1100                  * page_base = page offset within aperture
1101                  * page_offset = offset within page
1102                  * page_length = bytes to copy for this page
1103                  */
1104                 u32 page_base = node.start;
1105                 unsigned int page_offset = offset_in_page(offset);
1106                 unsigned int page_length = PAGE_SIZE - page_offset;
1107                 page_length = remain < page_length ? remain : page_length;
1108                 if (node.allocated) {
1109                         wmb(); /* flush the write before we modify the GGTT */
1110                         ggtt->vm.insert_page(&ggtt->vm,
1111                                              i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1112                                              node.start, I915_CACHE_NONE, 0);
1113                         wmb(); /* flush modifications to the GGTT (insert_page) */
1114                 } else {
1115                         page_base += offset & PAGE_MASK;
1116                 }
1117                 /* If we get a fault while copying data, then (presumably) our
1118                  * source page isn't available.  Return the error and we'll
1119                  * retry in the slow path.
1120                  * If the object is non-shmem backed, we retry again with the
1121                  * path that handles page fault.
1122                  */
1123                 if (ggtt_write(&ggtt->iomap, page_base, page_offset,
1124                                user_data, page_length)) {
1125                         ret = -EFAULT;
1126                         break;
1127                 }
1128
1129                 remain -= page_length;
1130                 user_data += page_length;
1131                 offset += page_length;
1132         }
1133         intel_fb_obj_flush(obj, ORIGIN_CPU);
1134
1135         mutex_lock(&i915->drm.struct_mutex);
1136 out_unpin:
1137         if (node.allocated) {
1138                 wmb();
1139                 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
1140                 remove_mappable_node(&node);
1141         } else {
1142                 i915_vma_unpin(vma);
1143         }
1144 out_rpm:
1145         intel_runtime_pm_put(i915, wakeref);
1146 out_unlock:
1147         mutex_unlock(&i915->drm.struct_mutex);
1148         return ret;
1149 }
1150
1151 /* Per-page copy function for the shmem pwrite fastpath.
1152  * Flushes invalid cachelines before writing to the target if
1153  * needs_clflush_before is set and flushes out any written cachelines after
1154  * writing if needs_clflush is set.
1155  */
1156 static int
1157 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
1158              bool needs_clflush_before,
1159              bool needs_clflush_after)
1160 {
1161         char *vaddr;
1162         int ret;
1163
1164         vaddr = kmap(page);
1165
1166         if (needs_clflush_before)
1167                 drm_clflush_virt_range(vaddr + offset, len);
1168
1169         ret = __copy_from_user(vaddr + offset, user_data, len);
1170         if (!ret && needs_clflush_after)
1171                 drm_clflush_virt_range(vaddr + offset, len);
1172
1173         kunmap(page);
1174
1175         return ret ? -EFAULT : 0;
1176 }
1177
1178 static int
1179 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
1180                       const struct drm_i915_gem_pwrite *args)
1181 {
1182         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1183         void __user *user_data;
1184         u64 remain;
1185         unsigned int partial_cacheline_write;
1186         unsigned int needs_clflush;
1187         unsigned int offset, idx;
1188         int ret;
1189
1190         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1191         if (ret)
1192                 return ret;
1193
1194         ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
1195         mutex_unlock(&i915->drm.struct_mutex);
1196         if (ret)
1197                 return ret;
1198
1199         /* If we don't overwrite a cacheline completely we need to be
1200          * careful to have up-to-date data by first clflushing. Don't
1201          * overcomplicate things and flush the entire patch.
1202          */
1203         partial_cacheline_write = 0;
1204         if (needs_clflush & CLFLUSH_BEFORE)
1205                 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
1206
1207         user_data = u64_to_user_ptr(args->data_ptr);
1208         remain = args->size;
1209         offset = offset_in_page(args->offset);
1210         for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
1211                 struct page *page = i915_gem_object_get_page(obj, idx);
1212                 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
1213
1214                 ret = shmem_pwrite(page, offset, length, user_data,
1215                                    (offset | length) & partial_cacheline_write,
1216                                    needs_clflush & CLFLUSH_AFTER);
1217                 if (ret)
1218                         break;
1219
1220                 remain -= length;
1221                 user_data += length;
1222                 offset = 0;
1223         }
1224
1225         intel_fb_obj_flush(obj, ORIGIN_CPU);
1226         i915_gem_obj_finish_shmem_access(obj);
1227         return ret;
1228 }
1229
1230 /**
1231  * Writes data to the object referenced by handle.
1232  * @dev: drm device
1233  * @data: ioctl data blob
1234  * @file: drm file
1235  *
1236  * On error, the contents of the buffer that were to be modified are undefined.
1237  */
1238 int
1239 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1240                       struct drm_file *file)
1241 {
1242         struct drm_i915_gem_pwrite *args = data;
1243         struct drm_i915_gem_object *obj;
1244         int ret;
1245
1246         if (args->size == 0)
1247                 return 0;
1248
1249         if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size))
1250                 return -EFAULT;
1251
1252         obj = i915_gem_object_lookup(file, args->handle);
1253         if (!obj)
1254                 return -ENOENT;
1255
1256         /* Bounds check destination. */
1257         if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1258                 ret = -EINVAL;
1259                 goto err;
1260         }
1261
1262         /* Writes not allowed into this read-only object */
1263         if (i915_gem_object_is_readonly(obj)) {
1264                 ret = -EINVAL;
1265                 goto err;
1266         }
1267
1268         trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1269
1270         ret = -ENODEV;
1271         if (obj->ops->pwrite)
1272                 ret = obj->ops->pwrite(obj, args);
1273         if (ret != -ENODEV)
1274                 goto err;
1275
1276         ret = i915_gem_object_wait(obj,
1277                                    I915_WAIT_INTERRUPTIBLE |
1278                                    I915_WAIT_ALL,
1279                                    MAX_SCHEDULE_TIMEOUT);
1280         if (ret)
1281                 goto err;
1282
1283         ret = i915_gem_object_pin_pages(obj);
1284         if (ret)
1285                 goto err;
1286
1287         ret = -EFAULT;
1288         /* We can only do the GTT pwrite on untiled buffers, as otherwise
1289          * it would end up going through the fenced access, and we'll get
1290          * different detiling behavior between reading and writing.
1291          * pread/pwrite currently are reading and writing from the CPU
1292          * perspective, requiring manual detiling by the client.
1293          */
1294         if (!i915_gem_object_has_struct_page(obj) ||
1295             cpu_write_needs_clflush(obj))
1296                 /* Note that the gtt paths might fail with non-page-backed user
1297                  * pointers (e.g. gtt mappings when moving data between
1298                  * textures). Fallback to the shmem path in that case.
1299                  */
1300                 ret = i915_gem_gtt_pwrite_fast(obj, args);
1301
1302         if (ret == -EFAULT || ret == -ENOSPC) {
1303                 if (obj->phys_handle)
1304                         ret = i915_gem_phys_pwrite(obj, args, file);
1305                 else
1306                         ret = i915_gem_shmem_pwrite(obj, args);
1307         }
1308
1309         i915_gem_object_unpin_pages(obj);
1310 err:
1311         i915_gem_object_put(obj);
1312         return ret;
1313 }
1314
1315 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
1316 {
1317         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1318         struct list_head *list;
1319         struct i915_vma *vma;
1320
1321         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
1322
1323         mutex_lock(&i915->ggtt.vm.mutex);
1324         for_each_ggtt_vma(vma, obj) {
1325                 if (!drm_mm_node_allocated(&vma->node))
1326                         continue;
1327
1328                 list_move_tail(&vma->vm_link, &vma->vm->bound_list);
1329         }
1330         mutex_unlock(&i915->ggtt.vm.mutex);
1331
1332         spin_lock(&i915->mm.obj_lock);
1333         list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
1334         list_move_tail(&obj->mm.link, list);
1335         spin_unlock(&i915->mm.obj_lock);
1336 }
1337
1338 /**
1339  * Called when user space prepares to use an object with the CPU, either
1340  * through the mmap ioctl's mapping or a GTT mapping.
1341  * @dev: drm device
1342  * @data: ioctl data blob
1343  * @file: drm file
1344  */
1345 int
1346 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1347                           struct drm_file *file)
1348 {
1349         struct drm_i915_gem_set_domain *args = data;
1350         struct drm_i915_gem_object *obj;
1351         u32 read_domains = args->read_domains;
1352         u32 write_domain = args->write_domain;
1353         int err;
1354
1355         /* Only handle setting domains to types used by the CPU. */
1356         if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
1357                 return -EINVAL;
1358
1359         /*
1360          * Having something in the write domain implies it's in the read
1361          * domain, and only that read domain.  Enforce that in the request.
1362          */
1363         if (write_domain && read_domains != write_domain)
1364                 return -EINVAL;
1365
1366         if (!read_domains)
1367                 return 0;
1368
1369         obj = i915_gem_object_lookup(file, args->handle);
1370         if (!obj)
1371                 return -ENOENT;
1372
1373         /*
1374          * Already in the desired write domain? Nothing for us to do!
1375          *
1376          * We apply a little bit of cunning here to catch a broader set of
1377          * no-ops. If obj->write_domain is set, we must be in the same
1378          * obj->read_domains, and only that domain. Therefore, if that
1379          * obj->write_domain matches the request read_domains, we are
1380          * already in the same read/write domain and can skip the operation,
1381          * without having to further check the requested write_domain.
1382          */
1383         if (READ_ONCE(obj->write_domain) == read_domains) {
1384                 err = 0;
1385                 goto out;
1386         }
1387
1388         /*
1389          * Try to flush the object off the GPU without holding the lock.
1390          * We will repeat the flush holding the lock in the normal manner
1391          * to catch cases where we are gazumped.
1392          */
1393         err = i915_gem_object_wait(obj,
1394                                    I915_WAIT_INTERRUPTIBLE |
1395                                    I915_WAIT_PRIORITY |
1396                                    (write_domain ? I915_WAIT_ALL : 0),
1397                                    MAX_SCHEDULE_TIMEOUT);
1398         if (err)
1399                 goto out;
1400
1401         /*
1402          * Proxy objects do not control access to the backing storage, ergo
1403          * they cannot be used as a means to manipulate the cache domain
1404          * tracking for that backing storage. The proxy object is always
1405          * considered to be outside of any cache domain.
1406          */
1407         if (i915_gem_object_is_proxy(obj)) {
1408                 err = -ENXIO;
1409                 goto out;
1410         }
1411
1412         /*
1413          * Flush and acquire obj->pages so that we are coherent through
1414          * direct access in memory with previous cached writes through
1415          * shmemfs and that our cache domain tracking remains valid.
1416          * For example, if the obj->filp was moved to swap without us
1417          * being notified and releasing the pages, we would mistakenly
1418          * continue to assume that the obj remained out of the CPU cached
1419          * domain.
1420          */
1421         err = i915_gem_object_pin_pages(obj);
1422         if (err)
1423                 goto out;
1424
1425         err = i915_mutex_lock_interruptible(dev);
1426         if (err)
1427                 goto out_unpin;
1428
1429         if (read_domains & I915_GEM_DOMAIN_WC)
1430                 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
1431         else if (read_domains & I915_GEM_DOMAIN_GTT)
1432                 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
1433         else
1434                 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
1435
1436         /* And bump the LRU for this access */
1437         i915_gem_object_bump_inactive_ggtt(obj);
1438
1439         mutex_unlock(&dev->struct_mutex);
1440
1441         if (write_domain != 0)
1442                 intel_fb_obj_invalidate(obj,
1443                                         fb_write_origin(obj, write_domain));
1444
1445 out_unpin:
1446         i915_gem_object_unpin_pages(obj);
1447 out:
1448         i915_gem_object_put(obj);
1449         return err;
1450 }
1451
1452 /**
1453  * Called when user space has done writes to this buffer
1454  * @dev: drm device
1455  * @data: ioctl data blob
1456  * @file: drm file
1457  */
1458 int
1459 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1460                          struct drm_file *file)
1461 {
1462         struct drm_i915_gem_sw_finish *args = data;
1463         struct drm_i915_gem_object *obj;
1464
1465         obj = i915_gem_object_lookup(file, args->handle);
1466         if (!obj)
1467                 return -ENOENT;
1468
1469         /*
1470          * Proxy objects are barred from CPU access, so there is no
1471          * need to ban sw_finish as it is a nop.
1472          */
1473
1474         /* Pinned buffers may be scanout, so flush the cache */
1475         i915_gem_object_flush_if_display(obj);
1476         i915_gem_object_put(obj);
1477
1478         return 0;
1479 }
1480
1481 static inline bool
1482 __vma_matches(struct vm_area_struct *vma, struct file *filp,
1483               unsigned long addr, unsigned long size)
1484 {
1485         if (vma->vm_file != filp)
1486                 return false;
1487
1488         return vma->vm_start == addr &&
1489                (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
1490 }
1491
1492 /**
1493  * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1494  *                       it is mapped to.
1495  * @dev: drm device
1496  * @data: ioctl data blob
1497  * @file: drm file
1498  *
1499  * While the mapping holds a reference on the contents of the object, it doesn't
1500  * imply a ref on the object itself.
1501  *
1502  * IMPORTANT:
1503  *
1504  * DRM driver writers who look a this function as an example for how to do GEM
1505  * mmap support, please don't implement mmap support like here. The modern way
1506  * to implement DRM mmap support is with an mmap offset ioctl (like
1507  * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1508  * That way debug tooling like valgrind will understand what's going on, hiding
1509  * the mmap call in a driver private ioctl will break that. The i915 driver only
1510  * does cpu mmaps this way because we didn't know better.
1511  */
1512 int
1513 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1514                     struct drm_file *file)
1515 {
1516         struct drm_i915_gem_mmap *args = data;
1517         struct drm_i915_gem_object *obj;
1518         unsigned long addr;
1519
1520         if (args->flags & ~(I915_MMAP_WC))
1521                 return -EINVAL;
1522
1523         if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1524                 return -ENODEV;
1525
1526         obj = i915_gem_object_lookup(file, args->handle);
1527         if (!obj)
1528                 return -ENOENT;
1529
1530         /* prime objects have no backing filp to GEM mmap
1531          * pages from.
1532          */
1533         if (!obj->base.filp) {
1534                 addr = -ENXIO;
1535                 goto err;
1536         }
1537
1538         if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
1539                 addr = -EINVAL;
1540                 goto err;
1541         }
1542
1543         addr = vm_mmap(obj->base.filp, 0, args->size,
1544                        PROT_READ | PROT_WRITE, MAP_SHARED,
1545                        args->offset);
1546         if (IS_ERR_VALUE(addr))
1547                 goto err;
1548
1549         if (args->flags & I915_MMAP_WC) {
1550                 struct mm_struct *mm = current->mm;
1551                 struct vm_area_struct *vma;
1552
1553                 if (down_write_killable(&mm->mmap_sem)) {
1554                         addr = -EINTR;
1555                         goto err;
1556                 }
1557                 vma = find_vma(mm, addr);
1558                 if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
1559                         vma->vm_page_prot =
1560                                 pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1561                 else
1562                         addr = -ENOMEM;
1563                 up_write(&mm->mmap_sem);
1564                 if (IS_ERR_VALUE(addr))
1565                         goto err;
1566
1567                 /* This may race, but that's ok, it only gets set */
1568                 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
1569         }
1570         i915_gem_object_put(obj);
1571
1572         args->addr_ptr = (u64)addr;
1573         return 0;
1574
1575 err:
1576         i915_gem_object_put(obj);
1577         return addr;
1578 }
1579
1580 static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
1581 {
1582         return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
1583 }
1584
1585 /**
1586  * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
1587  *
1588  * A history of the GTT mmap interface:
1589  *
1590  * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
1591  *     aligned and suitable for fencing, and still fit into the available
1592  *     mappable space left by the pinned display objects. A classic problem
1593  *     we called the page-fault-of-doom where we would ping-pong between
1594  *     two objects that could not fit inside the GTT and so the memcpy
1595  *     would page one object in at the expense of the other between every
1596  *     single byte.
1597  *
1598  * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
1599  *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
1600  *     object is too large for the available space (or simply too large
1601  *     for the mappable aperture!), a view is created instead and faulted
1602  *     into userspace. (This view is aligned and sized appropriately for
1603  *     fenced access.)
1604  *
1605  * 2 - Recognise WC as a separate cache domain so that we can flush the
1606  *     delayed writes via GTT before performing direct access via WC.
1607  *
1608  * 3 - Remove implicit set-domain(GTT) and synchronisation on initial
1609  *     pagefault; swapin remains transparent.
1610  *
1611  * Restrictions:
1612  *
1613  *  * snoopable objects cannot be accessed via the GTT. It can cause machine
1614  *    hangs on some architectures, corruption on others. An attempt to service
1615  *    a GTT page fault from a snoopable object will generate a SIGBUS.
1616  *
1617  *  * the object must be able to fit into RAM (physical memory, though no
1618  *    limited to the mappable aperture).
1619  *
1620  *
1621  * Caveats:
1622  *
1623  *  * a new GTT page fault will synchronize rendering from the GPU and flush
1624  *    all data to system memory. Subsequent access will not be synchronized.
1625  *
1626  *  * all mappings are revoked on runtime device suspend.
1627  *
1628  *  * there are only 8, 16 or 32 fence registers to share between all users
1629  *    (older machines require fence register for display and blitter access
1630  *    as well). Contention of the fence registers will cause the previous users
1631  *    to be unmapped and any new access will generate new page faults.
1632  *
1633  *  * running out of memory while servicing a fault may generate a SIGBUS,
1634  *    rather than the expected SIGSEGV.
1635  */
1636 int i915_gem_mmap_gtt_version(void)
1637 {
1638         return 3;
1639 }
1640
1641 static inline struct i915_ggtt_view
1642 compute_partial_view(const struct drm_i915_gem_object *obj,
1643                      pgoff_t page_offset,
1644                      unsigned int chunk)
1645 {
1646         struct i915_ggtt_view view;
1647
1648         if (i915_gem_object_is_tiled(obj))
1649                 chunk = roundup(chunk, tile_row_pages(obj));
1650
1651         view.type = I915_GGTT_VIEW_PARTIAL;
1652         view.partial.offset = rounddown(page_offset, chunk);
1653         view.partial.size =
1654                 min_t(unsigned int, chunk,
1655                       (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
1656
1657         /* If the partial covers the entire object, just create a normal VMA. */
1658         if (chunk >= obj->base.size >> PAGE_SHIFT)
1659                 view.type = I915_GGTT_VIEW_NORMAL;
1660
1661         return view;
1662 }
1663
1664 /**
1665  * i915_gem_fault - fault a page into the GTT
1666  * @vmf: fault info
1667  *
1668  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1669  * from userspace.  The fault handler takes care of binding the object to
1670  * the GTT (if needed), allocating and programming a fence register (again,
1671  * only if needed based on whether the old reg is still valid or the object
1672  * is tiled) and inserting a new PTE into the faulting process.
1673  *
1674  * Note that the faulting process may involve evicting existing objects
1675  * from the GTT and/or fence registers to make room.  So performance may
1676  * suffer if the GTT working set is large or there are few fence registers
1677  * left.
1678  *
1679  * The current feature set supported by i915_gem_fault() and thus GTT mmaps
1680  * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
1681  */
1682 vm_fault_t i915_gem_fault(struct vm_fault *vmf)
1683 {
1684 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
1685         struct vm_area_struct *area = vmf->vma;
1686         struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
1687         struct drm_device *dev = obj->base.dev;
1688         struct drm_i915_private *dev_priv = to_i915(dev);
1689         struct i915_ggtt *ggtt = &dev_priv->ggtt;
1690         bool write = area->vm_flags & VM_WRITE;
1691         intel_wakeref_t wakeref;
1692         struct i915_vma *vma;
1693         pgoff_t page_offset;
1694         int srcu;
1695         int ret;
1696
1697         /* Sanity check that we allow writing into this object */
1698         if (i915_gem_object_is_readonly(obj) && write)
1699                 return VM_FAULT_SIGBUS;
1700
1701         /* We don't use vmf->pgoff since that has the fake offset */
1702         page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
1703
1704         trace_i915_gem_object_fault(obj, page_offset, true, write);
1705
1706         ret = i915_gem_object_pin_pages(obj);
1707         if (ret)
1708                 goto err;
1709
1710         wakeref = intel_runtime_pm_get(dev_priv);
1711
1712         srcu = i915_reset_trylock(dev_priv);
1713         if (srcu < 0) {
1714                 ret = srcu;
1715                 goto err_rpm;
1716         }
1717
1718         ret = i915_mutex_lock_interruptible(dev);
1719         if (ret)
1720                 goto err_reset;
1721
1722         /* Access to snoopable pages through the GTT is incoherent. */
1723         if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
1724                 ret = -EFAULT;
1725                 goto err_unlock;
1726         }
1727
1728         /* Now pin it into the GTT as needed */
1729         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1730                                        PIN_MAPPABLE |
1731                                        PIN_NONBLOCK |
1732                                        PIN_NONFAULT);
1733         if (IS_ERR(vma)) {
1734                 /* Use a partial view if it is bigger than available space */
1735                 struct i915_ggtt_view view =
1736                         compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
1737                 unsigned int flags;
1738
1739                 flags = PIN_MAPPABLE;
1740                 if (view.type == I915_GGTT_VIEW_NORMAL)
1741                         flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
1742
1743                 /*
1744                  * Userspace is now writing through an untracked VMA, abandon
1745                  * all hope that the hardware is able to track future writes.
1746                  */
1747                 obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
1748
1749                 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
1750                 if (IS_ERR(vma) && !view.type) {
1751                         flags = PIN_MAPPABLE;
1752                         view.type = I915_GGTT_VIEW_PARTIAL;
1753                         vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
1754                 }
1755         }
1756         if (IS_ERR(vma)) {
1757                 ret = PTR_ERR(vma);
1758                 goto err_unlock;
1759         }
1760
1761         ret = i915_vma_pin_fence(vma);
1762         if (ret)
1763                 goto err_unpin;
1764
1765         /* Finally, remap it using the new GTT offset */
1766         ret = remap_io_mapping(area,
1767                                area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
1768                                (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
1769                                min_t(u64, vma->size, area->vm_end - area->vm_start),
1770                                &ggtt->iomap);
1771         if (ret)
1772                 goto err_fence;
1773
1774         /* Mark as being mmapped into userspace for later revocation */
1775         assert_rpm_wakelock_held(dev_priv);
1776         if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
1777                 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
1778         if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
1779                 intel_wakeref_auto(&dev_priv->mm.userfault_wakeref,
1780                                    msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
1781         GEM_BUG_ON(!obj->userfault_count);
1782
1783         i915_vma_set_ggtt_write(vma);
1784
1785 err_fence:
1786         i915_vma_unpin_fence(vma);
1787 err_unpin:
1788         __i915_vma_unpin(vma);
1789 err_unlock:
1790         mutex_unlock(&dev->struct_mutex);
1791 err_reset:
1792         i915_reset_unlock(dev_priv, srcu);
1793 err_rpm:
1794         intel_runtime_pm_put(dev_priv, wakeref);
1795         i915_gem_object_unpin_pages(obj);
1796 err:
1797         switch (ret) {
1798         case -EIO:
1799                 /*
1800                  * We eat errors when the gpu is terminally wedged to avoid
1801                  * userspace unduly crashing (gl has no provisions for mmaps to
1802                  * fail). But any other -EIO isn't ours (e.g. swap in failure)
1803                  * and so needs to be reported.
1804                  */
1805                 if (!i915_terminally_wedged(dev_priv))
1806                         return VM_FAULT_SIGBUS;
1807                 /* else: fall through */
1808         case -EAGAIN:
1809                 /*
1810                  * EAGAIN means the gpu is hung and we'll wait for the error
1811                  * handler to reset everything when re-faulting in
1812                  * i915_mutex_lock_interruptible.
1813                  */
1814         case 0:
1815         case -ERESTARTSYS:
1816         case -EINTR:
1817         case -EBUSY:
1818                 /*
1819                  * EBUSY is ok: this just means that another thread
1820                  * already did the job.
1821                  */
1822                 return VM_FAULT_NOPAGE;
1823         case -ENOMEM:
1824                 return VM_FAULT_OOM;
1825         case -ENOSPC:
1826         case -EFAULT:
1827                 return VM_FAULT_SIGBUS;
1828         default:
1829                 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
1830                 return VM_FAULT_SIGBUS;
1831         }
1832 }
1833
1834 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
1835 {
1836         struct i915_vma *vma;
1837
1838         GEM_BUG_ON(!obj->userfault_count);
1839
1840         obj->userfault_count = 0;
1841         list_del(&obj->userfault_link);
1842         drm_vma_node_unmap(&obj->base.vma_node,
1843                            obj->base.dev->anon_inode->i_mapping);
1844
1845         for_each_ggtt_vma(vma, obj)
1846                 i915_vma_unset_userfault(vma);
1847 }
1848
1849 /**
1850  * i915_gem_release_mmap - remove physical page mappings
1851  * @obj: obj in question
1852  *
1853  * Preserve the reservation of the mmapping with the DRM core code, but
1854  * relinquish ownership of the pages back to the system.
1855  *
1856  * It is vital that we remove the page mapping if we have mapped a tiled
1857  * object through the GTT and then lose the fence register due to
1858  * resource pressure. Similarly if the object has been moved out of the
1859  * aperture, than pages mapped into userspace must be revoked. Removing the
1860  * mapping will then trigger a page fault on the next user access, allowing
1861  * fixup by i915_gem_fault().
1862  */
1863 void
1864 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1865 {
1866         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1867         intel_wakeref_t wakeref;
1868
1869         /* Serialisation between user GTT access and our code depends upon
1870          * revoking the CPU's PTE whilst the mutex is held. The next user
1871          * pagefault then has to wait until we release the mutex.
1872          *
1873          * Note that RPM complicates somewhat by adding an additional
1874          * requirement that operations to the GGTT be made holding the RPM
1875          * wakeref.
1876          */
1877         lockdep_assert_held(&i915->drm.struct_mutex);
1878         wakeref = intel_runtime_pm_get(i915);
1879
1880         if (!obj->userfault_count)
1881                 goto out;
1882
1883         __i915_gem_object_release_mmap(obj);
1884
1885         /* Ensure that the CPU's PTE are revoked and there are not outstanding
1886          * memory transactions from userspace before we return. The TLB
1887          * flushing implied above by changing the PTE above *should* be
1888          * sufficient, an extra barrier here just provides us with a bit
1889          * of paranoid documentation about our requirement to serialise
1890          * memory writes before touching registers / GSM.
1891          */
1892         wmb();
1893
1894 out:
1895         intel_runtime_pm_put(i915, wakeref);
1896 }
1897
1898 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
1899 {
1900         struct drm_i915_gem_object *obj, *on;
1901         int i;
1902
1903         /*
1904          * Only called during RPM suspend. All users of the userfault_list
1905          * must be holding an RPM wakeref to ensure that this can not
1906          * run concurrently with themselves (and use the struct_mutex for
1907          * protection between themselves).
1908          */
1909
1910         list_for_each_entry_safe(obj, on,
1911                                  &dev_priv->mm.userfault_list, userfault_link)
1912                 __i915_gem_object_release_mmap(obj);
1913
1914         /* The fence will be lost when the device powers down. If any were
1915          * in use by hardware (i.e. they are pinned), we should not be powering
1916          * down! All other fences will be reacquired by the user upon waking.
1917          */
1918         for (i = 0; i < dev_priv->num_fence_regs; i++) {
1919                 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
1920
1921                 /* Ideally we want to assert that the fence register is not
1922                  * live at this point (i.e. that no piece of code will be
1923                  * trying to write through fence + GTT, as that both violates
1924                  * our tracking of activity and associated locking/barriers,
1925                  * but also is illegal given that the hw is powered down).
1926                  *
1927                  * Previously we used reg->pin_count as a "liveness" indicator.
1928                  * That is not sufficient, and we need a more fine-grained
1929                  * tool if we want to have a sanity check here.
1930                  */
1931
1932                 if (!reg->vma)
1933                         continue;
1934
1935                 GEM_BUG_ON(i915_vma_has_userfault(reg->vma));
1936                 reg->dirty = true;
1937         }
1938 }
1939
1940 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
1941 {
1942         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
1943         int err;
1944
1945         err = drm_gem_create_mmap_offset(&obj->base);
1946         if (likely(!err))
1947                 return 0;
1948
1949         /* Attempt to reap some mmap space from dead objects */
1950         do {
1951                 err = i915_gem_wait_for_idle(dev_priv,
1952                                              I915_WAIT_INTERRUPTIBLE,
1953                                              MAX_SCHEDULE_TIMEOUT);
1954                 if (err)
1955                         break;
1956
1957                 i915_gem_drain_freed_objects(dev_priv);
1958                 err = drm_gem_create_mmap_offset(&obj->base);
1959                 if (!err)
1960                         break;
1961
1962         } while (flush_delayed_work(&dev_priv->gem.retire_work));
1963
1964         return err;
1965 }
1966
1967 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
1968 {
1969         drm_gem_free_mmap_offset(&obj->base);
1970 }
1971
1972 int
1973 i915_gem_mmap_gtt(struct drm_file *file,
1974                   struct drm_device *dev,
1975                   u32 handle,
1976                   u64 *offset)
1977 {
1978         struct drm_i915_gem_object *obj;
1979         int ret;
1980
1981         obj = i915_gem_object_lookup(file, handle);
1982         if (!obj)
1983                 return -ENOENT;
1984
1985         ret = i915_gem_object_create_mmap_offset(obj);
1986         if (ret == 0)
1987                 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
1988
1989         i915_gem_object_put(obj);
1990         return ret;
1991 }
1992
1993 /**
1994  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1995  * @dev: DRM device
1996  * @data: GTT mapping ioctl data
1997  * @file: GEM object info
1998  *
1999  * Simply returns the fake offset to userspace so it can mmap it.
2000  * The mmap call will end up in drm_gem_mmap(), which will set things
2001  * up so we can get faults in the handler above.
2002  *
2003  * The fault handler will take care of binding the object into the GTT
2004  * (since it may have been evicted to make room for something), allocating
2005  * a fence register, and mapping the appropriate aperture address into
2006  * userspace.
2007  */
2008 int
2009 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2010                         struct drm_file *file)
2011 {
2012         struct drm_i915_gem_mmap_gtt *args = data;
2013
2014         return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2015 }
2016
2017 /* Immediately discard the backing storage */
2018 void __i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2019 {
2020         i915_gem_object_free_mmap_offset(obj);
2021
2022         if (obj->base.filp == NULL)
2023                 return;
2024
2025         /* Our goal here is to return as much of the memory as
2026          * is possible back to the system as we are called from OOM.
2027          * To do this we must instruct the shmfs to drop all of its
2028          * backing pages, *now*.
2029          */
2030         shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
2031         obj->mm.madv = __I915_MADV_PURGED;
2032         obj->mm.pages = ERR_PTR(-EFAULT);
2033 }
2034
2035 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
2036 {
2037         struct radix_tree_iter iter;
2038         void __rcu **slot;
2039
2040         rcu_read_lock();
2041         radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
2042                 radix_tree_delete(&obj->mm.get_page.radix, iter.index);
2043         rcu_read_unlock();
2044 }
2045
2046 static struct sg_table *
2047 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
2048 {
2049         struct drm_i915_private *i915 = to_i915(obj->base.dev);
2050         struct sg_table *pages;
2051
2052         pages = fetch_and_zero(&obj->mm.pages);
2053         if (IS_ERR_OR_NULL(pages))
2054                 return pages;
2055
2056         spin_lock(&i915->mm.obj_lock);
2057         list_del(&obj->mm.link);
2058         spin_unlock(&i915->mm.obj_lock);
2059
2060         if (obj->mm.mapping) {
2061                 void *ptr;
2062
2063                 ptr = page_mask_bits(obj->mm.mapping);
2064                 if (is_vmalloc_addr(ptr))
2065                         vunmap(ptr);
2066                 else
2067                         kunmap(kmap_to_page(ptr));
2068
2069                 obj->mm.mapping = NULL;
2070         }
2071
2072         __i915_gem_object_reset_page_iter(obj);
2073         obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
2074
2075         return pages;
2076 }
2077
2078 int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
2079                                 enum i915_mm_subclass subclass)
2080 {
2081         struct sg_table *pages;
2082         int ret;
2083
2084         if (i915_gem_object_has_pinned_pages(obj))
2085                 return -EBUSY;
2086
2087         GEM_BUG_ON(obj->bind_count);
2088
2089         /* May be called by shrinker from within get_pages() (on another bo) */
2090         mutex_lock_nested(&obj->mm.lock, subclass);
2091         if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
2092                 ret = -EBUSY;
2093                 goto unlock;
2094         }
2095
2096         /*
2097          * ->put_pages might need to allocate memory for the bit17 swizzle
2098          * array, hence protect them from being reaped by removing them from gtt
2099          * lists early.
2100          */
2101         pages = __i915_gem_object_unset_pages(obj);
2102
2103         /*
2104          * XXX Temporary hijinx to avoid updating all backends to handle
2105          * NULL pages. In the future, when we have more asynchronous
2106          * get_pages backends we should be better able to handle the
2107          * cancellation of the async task in a more uniform manner.
2108          */
2109         if (!pages && !i915_gem_object_needs_async_cancel(obj))
2110                 pages = ERR_PTR(-EINVAL);
2111
2112         if (!IS_ERR(pages))
2113                 obj->ops->put_pages(obj, pages);
2114
2115         ret = 0;
2116 unlock:
2117         mutex_unlock(&obj->mm.lock);
2118
2119         return ret;
2120 }
2121
2122 bool i915_sg_trim(struct sg_table *orig_st)
2123 {
2124         struct sg_table new_st;
2125         struct scatterlist *sg, *new_sg;
2126         unsigned int i;
2127
2128         if (orig_st->nents == orig_st->orig_nents)
2129                 return false;
2130
2131         if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
2132                 return false;
2133
2134         new_sg = new_st.sgl;
2135         for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
2136                 sg_set_page(new_sg, sg_page(sg), sg->length, 0);
2137                 sg_dma_address(new_sg) = sg_dma_address(sg);
2138                 sg_dma_len(new_sg) = sg_dma_len(sg);
2139
2140                 new_sg = sg_next(new_sg);
2141         }
2142         GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
2143
2144         sg_free_table(orig_st);
2145
2146         *orig_st = new_st;
2147         return true;
2148 }
2149
2150 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
2151                                  struct sg_table *pages,
2152                                  unsigned int sg_page_sizes)
2153 {
2154         struct drm_i915_private *i915 = to_i915(obj->base.dev);
2155         unsigned long supported = INTEL_INFO(i915)->page_sizes;
2156         int i;
2157
2158         lockdep_assert_held(&obj->mm.lock);
2159
2160         /* Make the pages coherent with the GPU (flushing any swapin). */
2161         if (obj->cache_dirty) {
2162                 obj->write_domain = 0;
2163                 if (i915_gem_object_has_struct_page(obj))
2164                         drm_clflush_sg(pages);
2165                 obj->cache_dirty = false;
2166         }
2167
2168         obj->mm.get_page.sg_pos = pages->sgl;
2169         obj->mm.get_page.sg_idx = 0;
2170
2171         obj->mm.pages = pages;
2172
2173         if (i915_gem_object_is_tiled(obj) &&
2174             i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
2175                 GEM_BUG_ON(obj->mm.quirked);
2176                 __i915_gem_object_pin_pages(obj);
2177                 obj->mm.quirked = true;
2178         }
2179
2180         GEM_BUG_ON(!sg_page_sizes);
2181         obj->mm.page_sizes.phys = sg_page_sizes;
2182
2183         /*
2184          * Calculate the supported page-sizes which fit into the given
2185          * sg_page_sizes. This will give us the page-sizes which we may be able
2186          * to use opportunistically when later inserting into the GTT. For
2187          * example if phys=2G, then in theory we should be able to use 1G, 2M,
2188          * 64K or 4K pages, although in practice this will depend on a number of
2189          * other factors.
2190          */
2191         obj->mm.page_sizes.sg = 0;
2192         for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
2193                 if (obj->mm.page_sizes.phys & ~0u << i)
2194                         obj->mm.page_sizes.sg |= BIT(i);
2195         }
2196         GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
2197
2198         spin_lock(&i915->mm.obj_lock);
2199         list_add(&obj->mm.link, &i915->mm.unbound_list);
2200         spin_unlock(&i915->mm.obj_lock);
2201 }
2202
2203 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2204 {
2205         int err;
2206
2207         if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
2208                 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2209                 return -EFAULT;
2210         }
2211
2212         err = obj->ops->get_pages(obj);
2213         GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
2214
2215         return err;
2216 }
2217
2218 /* Ensure that the associated pages are gathered from the backing storage
2219  * and pinned into our object. i915_gem_object_pin_pages() may be called
2220  * multiple times before they are released by a single call to
2221  * i915_gem_object_unpin_pages() - once the pages are no longer referenced
2222  * either as a result of memory pressure (reaping pages under the shrinker)
2223  * or as the object is itself released.
2224  */
2225 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2226 {
2227         int err;
2228
2229         err = mutex_lock_interruptible(&obj->mm.lock);
2230         if (err)
2231                 return err;
2232
2233         if (unlikely(!i915_gem_object_has_pages(obj))) {
2234                 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2235
2236                 err = ____i915_gem_object_get_pages(obj);
2237                 if (err)
2238                         goto unlock;
2239
2240                 smp_mb__before_atomic();
2241         }
2242         atomic_inc(&obj->mm.pages_pin_count);
2243
2244 unlock:
2245         mutex_unlock(&obj->mm.lock);
2246         return err;
2247 }
2248
2249 /* The 'mapping' part of i915_gem_object_pin_map() below */
2250 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
2251                                  enum i915_map_type type)
2252 {
2253         unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2254         struct sg_table *sgt = obj->mm.pages;
2255         struct sgt_iter sgt_iter;
2256         struct page *page;
2257         struct page *stack_pages[32];
2258         struct page **pages = stack_pages;
2259         unsigned long i = 0;
2260         pgprot_t pgprot;
2261         void *addr;
2262
2263         /* A single page can always be kmapped */
2264         if (n_pages == 1 && type == I915_MAP_WB)
2265                 return kmap(sg_page(sgt->sgl));
2266
2267         if (n_pages > ARRAY_SIZE(stack_pages)) {
2268                 /* Too big for stack -- allocate temporary array instead */
2269                 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
2270                 if (!pages)
2271                         return NULL;
2272         }
2273
2274         for_each_sgt_page(page, sgt_iter, sgt)
2275                 pages[i++] = page;
2276
2277         /* Check that we have the expected number of pages */
2278         GEM_BUG_ON(i != n_pages);
2279
2280         switch (type) {
2281         default:
2282                 MISSING_CASE(type);
2283                 /* fallthrough to use PAGE_KERNEL anyway */
2284         case I915_MAP_WB:
2285                 pgprot = PAGE_KERNEL;
2286                 break;
2287         case I915_MAP_WC:
2288                 pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
2289                 break;
2290         }
2291         addr = vmap(pages, n_pages, 0, pgprot);
2292
2293         if (pages != stack_pages)
2294                 kvfree(pages);
2295
2296         return addr;
2297 }
2298
2299 /* get, pin, and map the pages of the object into kernel space */
2300 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2301                               enum i915_map_type type)
2302 {
2303         enum i915_map_type has_type;
2304         bool pinned;
2305         void *ptr;
2306         int ret;
2307
2308         if (unlikely(!i915_gem_object_has_struct_page(obj)))
2309                 return ERR_PTR(-ENXIO);
2310
2311         ret = mutex_lock_interruptible(&obj->mm.lock);
2312         if (ret)
2313                 return ERR_PTR(ret);
2314
2315         pinned = !(type & I915_MAP_OVERRIDE);
2316         type &= ~I915_MAP_OVERRIDE;
2317
2318         if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
2319                 if (unlikely(!i915_gem_object_has_pages(obj))) {
2320                         GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2321
2322                         ret = ____i915_gem_object_get_pages(obj);
2323                         if (ret)
2324                                 goto err_unlock;
2325
2326                         smp_mb__before_atomic();
2327                 }
2328                 atomic_inc(&obj->mm.pages_pin_count);
2329                 pinned = false;
2330         }
2331         GEM_BUG_ON(!i915_gem_object_has_pages(obj));
2332
2333         ptr = page_unpack_bits(obj->mm.mapping, &has_type);
2334         if (ptr && has_type != type) {
2335                 if (pinned) {
2336                         ret = -EBUSY;
2337                         goto err_unpin;
2338                 }
2339
2340                 if (is_vmalloc_addr(ptr))
2341                         vunmap(ptr);
2342                 else
2343                         kunmap(kmap_to_page(ptr));
2344
2345                 ptr = obj->mm.mapping = NULL;
2346         }
2347
2348         if (!ptr) {
2349                 ptr = i915_gem_object_map(obj, type);
2350                 if (!ptr) {
2351                         ret = -ENOMEM;
2352                         goto err_unpin;
2353                 }
2354
2355                 obj->mm.mapping = page_pack_bits(ptr, type);
2356         }
2357
2358 out_unlock:
2359         mutex_unlock(&obj->mm.lock);
2360         return ptr;
2361
2362 err_unpin:
2363         atomic_dec(&obj->mm.pages_pin_count);
2364 err_unlock:
2365         ptr = ERR_PTR(ret);
2366         goto out_unlock;
2367 }
2368
2369 void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
2370                                  unsigned long offset,
2371                                  unsigned long size)
2372 {
2373         enum i915_map_type has_type;
2374         void *ptr;
2375
2376         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
2377         GEM_BUG_ON(range_overflows_t(typeof(obj->base.size),
2378                                      offset, size, obj->base.size));
2379
2380         obj->mm.dirty = true;
2381
2382         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)
2383                 return;
2384
2385         ptr = page_unpack_bits(obj->mm.mapping, &has_type);
2386         if (has_type == I915_MAP_WC)
2387                 return;
2388
2389         drm_clflush_virt_range(ptr + offset, size);
2390         if (size == obj->base.size) {
2391                 obj->write_domain &= ~I915_GEM_DOMAIN_CPU;
2392                 obj->cache_dirty = false;
2393         }
2394 }
2395
2396 static unsigned long to_wait_timeout(s64 timeout_ns)
2397 {
2398         if (timeout_ns < 0)
2399                 return MAX_SCHEDULE_TIMEOUT;
2400
2401         if (timeout_ns == 0)
2402                 return 0;
2403
2404         return nsecs_to_jiffies_timeout(timeout_ns);
2405 }
2406
2407 /**
2408  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2409  * @dev: drm device pointer
2410  * @data: ioctl data blob
2411  * @file: drm file pointer
2412  *
2413  * Returns 0 if successful, else an error is returned with the remaining time in
2414  * the timeout parameter.
2415  *  -ETIME: object is still busy after timeout
2416  *  -ERESTARTSYS: signal interrupted the wait
2417  *  -ENONENT: object doesn't exist
2418  * Also possible, but rare:
2419  *  -EAGAIN: incomplete, restart syscall
2420  *  -ENOMEM: damn
2421  *  -ENODEV: Internal IRQ fail
2422  *  -E?: The add request failed
2423  *
2424  * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2425  * non-zero timeout parameter the wait ioctl will wait for the given number of
2426  * nanoseconds on an object becoming unbusy. Since the wait itself does so
2427  * without holding struct_mutex the object may become re-busied before this
2428  * function completes. A similar but shorter * race condition exists in the busy
2429  * ioctl
2430  */
2431 int
2432 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2433 {
2434         struct drm_i915_gem_wait *args = data;
2435         struct drm_i915_gem_object *obj;
2436         ktime_t start;
2437         long ret;
2438
2439         if (args->flags != 0)
2440                 return -EINVAL;
2441
2442         obj = i915_gem_object_lookup(file, args->bo_handle);
2443         if (!obj)
2444                 return -ENOENT;
2445
2446         start = ktime_get();
2447
2448         ret = i915_gem_object_wait(obj,
2449                                    I915_WAIT_INTERRUPTIBLE |
2450                                    I915_WAIT_PRIORITY |
2451                                    I915_WAIT_ALL,
2452                                    to_wait_timeout(args->timeout_ns));
2453
2454         if (args->timeout_ns > 0) {
2455                 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
2456                 if (args->timeout_ns < 0)
2457                         args->timeout_ns = 0;
2458
2459                 /*
2460                  * Apparently ktime isn't accurate enough and occasionally has a
2461                  * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
2462                  * things up to make the test happy. We allow up to 1 jiffy.
2463                  *
2464                  * This is a regression from the timespec->ktime conversion.
2465                  */
2466                 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
2467                         args->timeout_ns = 0;
2468
2469                 /* Asked to wait beyond the jiffie/scheduler precision? */
2470                 if (ret == -ETIME && args->timeout_ns)
2471                         ret = -EAGAIN;
2472         }
2473
2474         i915_gem_object_put(obj);
2475         return ret;
2476 }
2477
2478 static int wait_for_engines(struct drm_i915_private *i915)
2479 {
2480         if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
2481                 dev_err(i915->drm.dev,
2482                         "Failed to idle engines, declaring wedged!\n");
2483                 GEM_TRACE_DUMP();
2484                 i915_gem_set_wedged(i915);
2485                 return -EIO;
2486         }
2487
2488         return 0;
2489 }
2490
2491 static long
2492 wait_for_timelines(struct drm_i915_private *i915,
2493                    unsigned int flags, long timeout)
2494 {
2495         struct i915_gt_timelines *gt = &i915->gt.timelines;
2496         struct i915_timeline *tl;
2497
2498         mutex_lock(&gt->mutex);
2499         list_for_each_entry(tl, &gt->active_list, link) {
2500                 struct i915_request *rq;
2501
2502                 rq = i915_active_request_get_unlocked(&tl->last_request);
2503                 if (!rq)
2504                         continue;
2505
2506                 mutex_unlock(&gt->mutex);
2507
2508                 /*
2509                  * "Race-to-idle".
2510                  *
2511                  * Switching to the kernel context is often used a synchronous
2512                  * step prior to idling, e.g. in suspend for flushing all
2513                  * current operations to memory before sleeping. These we
2514                  * want to complete as quickly as possible to avoid prolonged
2515                  * stalls, so allow the gpu to boost to maximum clocks.
2516                  */
2517                 if (flags & I915_WAIT_FOR_IDLE_BOOST)
2518                         gen6_rps_boost(rq);
2519
2520                 timeout = i915_request_wait(rq, flags, timeout);
2521                 i915_request_put(rq);
2522                 if (timeout < 0)
2523                         return timeout;
2524
2525                 /* restart after reacquiring the lock */
2526                 mutex_lock(&gt->mutex);
2527                 tl = list_entry(&gt->active_list, typeof(*tl), link);
2528         }
2529         mutex_unlock(&gt->mutex);
2530
2531         return timeout;
2532 }
2533
2534 int i915_gem_wait_for_idle(struct drm_i915_private *i915,
2535                            unsigned int flags, long timeout)
2536 {
2537         GEM_TRACE("flags=%x (%s), timeout=%ld%s, awake?=%s\n",
2538                   flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
2539                   timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "",
2540                   yesno(i915->gt.awake));
2541
2542         /* If the device is asleep, we have no requests outstanding */
2543         if (!READ_ONCE(i915->gt.awake))
2544                 return 0;
2545
2546         timeout = wait_for_timelines(i915, flags, timeout);
2547         if (timeout < 0)
2548                 return timeout;
2549
2550         if (flags & I915_WAIT_LOCKED) {
2551                 int err;
2552
2553                 lockdep_assert_held(&i915->drm.struct_mutex);
2554
2555                 err = wait_for_engines(i915);
2556                 if (err)
2557                         return err;
2558
2559                 i915_retire_requests(i915);
2560         }
2561
2562         return 0;
2563 }
2564
2565 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
2566 {
2567         /*
2568          * We manually flush the CPU domain so that we can override and
2569          * force the flush for the display, and perform it asyncrhonously.
2570          */
2571         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
2572         if (obj->cache_dirty)
2573                 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
2574         obj->write_domain = 0;
2575 }
2576
2577 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
2578 {
2579         if (!READ_ONCE(obj->pin_global))
2580                 return;
2581
2582         mutex_lock(&obj->base.dev->struct_mutex);
2583         __i915_gem_object_flush_for_display(obj);
2584         mutex_unlock(&obj->base.dev->struct_mutex);
2585 }
2586
2587 /**
2588  * Moves a single object to the WC read, and possibly write domain.
2589  * @obj: object to act on
2590  * @write: ask for write access or read only
2591  *
2592  * This function returns when the move is complete, including waiting on
2593  * flushes to occur.
2594  */
2595 int
2596 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
2597 {
2598         int ret;
2599
2600         lockdep_assert_held(&obj->base.dev->struct_mutex);
2601
2602         ret = i915_gem_object_wait(obj,
2603                                    I915_WAIT_INTERRUPTIBLE |
2604                                    I915_WAIT_LOCKED |
2605                                    (write ? I915_WAIT_ALL : 0),
2606                                    MAX_SCHEDULE_TIMEOUT);
2607         if (ret)
2608                 return ret;
2609
2610         if (obj->write_domain == I915_GEM_DOMAIN_WC)
2611                 return 0;
2612
2613         /* Flush and acquire obj->pages so that we are coherent through
2614          * direct access in memory with previous cached writes through
2615          * shmemfs and that our cache domain tracking remains valid.
2616          * For example, if the obj->filp was moved to swap without us
2617          * being notified and releasing the pages, we would mistakenly
2618          * continue to assume that the obj remained out of the CPU cached
2619          * domain.
2620          */
2621         ret = i915_gem_object_pin_pages(obj);
2622         if (ret)
2623                 return ret;
2624
2625         flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
2626
2627         /* Serialise direct access to this object with the barriers for
2628          * coherent writes from the GPU, by effectively invalidating the
2629          * WC domain upon first access.
2630          */
2631         if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
2632                 mb();
2633
2634         /* It should now be out of any other write domains, and we can update
2635          * the domain values for our changes.
2636          */
2637         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
2638         obj->read_domains |= I915_GEM_DOMAIN_WC;
2639         if (write) {
2640                 obj->read_domains = I915_GEM_DOMAIN_WC;
2641                 obj->write_domain = I915_GEM_DOMAIN_WC;
2642                 obj->mm.dirty = true;
2643         }
2644
2645         i915_gem_object_unpin_pages(obj);
2646         return 0;
2647 }
2648
2649 /**
2650  * Moves a single object to the GTT read, and possibly write domain.
2651  * @obj: object to act on
2652  * @write: ask for write access or read only
2653  *
2654  * This function returns when the move is complete, including waiting on
2655  * flushes to occur.
2656  */
2657 int
2658 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2659 {
2660         int ret;
2661
2662         lockdep_assert_held(&obj->base.dev->struct_mutex);
2663
2664         ret = i915_gem_object_wait(obj,
2665                                    I915_WAIT_INTERRUPTIBLE |
2666                                    I915_WAIT_LOCKED |
2667                                    (write ? I915_WAIT_ALL : 0),
2668                                    MAX_SCHEDULE_TIMEOUT);
2669         if (ret)
2670                 return ret;
2671
2672         if (obj->write_domain == I915_GEM_DOMAIN_GTT)
2673                 return 0;
2674
2675         /* Flush and acquire obj->pages so that we are coherent through
2676          * direct access in memory with previous cached writes through
2677          * shmemfs and that our cache domain tracking remains valid.
2678          * For example, if the obj->filp was moved to swap without us
2679          * being notified and releasing the pages, we would mistakenly
2680          * continue to assume that the obj remained out of the CPU cached
2681          * domain.
2682          */
2683         ret = i915_gem_object_pin_pages(obj);
2684         if (ret)
2685                 return ret;
2686
2687         flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
2688
2689         /* Serialise direct access to this object with the barriers for
2690          * coherent writes from the GPU, by effectively invalidating the
2691          * GTT domain upon first access.
2692          */
2693         if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
2694                 mb();
2695
2696         /* It should now be out of any other write domains, and we can update
2697          * the domain values for our changes.
2698          */
2699         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2700         obj->read_domains |= I915_GEM_DOMAIN_GTT;
2701         if (write) {
2702                 obj->read_domains = I915_GEM_DOMAIN_GTT;
2703                 obj->write_domain = I915_GEM_DOMAIN_GTT;
2704                 obj->mm.dirty = true;
2705         }
2706
2707         i915_gem_object_unpin_pages(obj);
2708         return 0;
2709 }
2710
2711 /**
2712  * Changes the cache-level of an object across all VMA.
2713  * @obj: object to act on
2714  * @cache_level: new cache level to set for the object
2715  *
2716  * After this function returns, the object will be in the new cache-level
2717  * across all GTT and the contents of the backing storage will be coherent,
2718  * with respect to the new cache-level. In order to keep the backing storage
2719  * coherent for all users, we only allow a single cache level to be set
2720  * globally on the object and prevent it from being changed whilst the
2721  * hardware is reading from the object. That is if the object is currently
2722  * on the scanout it will be set to uncached (or equivalent display
2723  * cache coherency) and all non-MOCS GPU access will also be uncached so
2724  * that all direct access to the scanout remains coherent.
2725  */
2726 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
2727                                     enum i915_cache_level cache_level)
2728 {
2729         struct i915_vma *vma;
2730         int ret;
2731
2732         lockdep_assert_held(&obj->base.dev->struct_mutex);
2733
2734         if (obj->cache_level == cache_level)
2735                 return 0;
2736
2737         /* Inspect the list of currently bound VMA and unbind any that would
2738          * be invalid given the new cache-level. This is principally to
2739          * catch the issue of the CS prefetch crossing page boundaries and
2740          * reading an invalid PTE on older architectures.
2741          */
2742 restart:
2743         list_for_each_entry(vma, &obj->vma.list, obj_link) {
2744                 if (!drm_mm_node_allocated(&vma->node))
2745                         continue;
2746
2747                 if (i915_vma_is_pinned(vma)) {
2748                         DRM_DEBUG("can not change the cache level of pinned objects\n");
2749                         return -EBUSY;
2750                 }
2751
2752                 if (!i915_vma_is_closed(vma) &&
2753                     i915_gem_valid_gtt_space(vma, cache_level))
2754                         continue;
2755
2756                 ret = i915_vma_unbind(vma);
2757                 if (ret)
2758                         return ret;
2759
2760                 /* As unbinding may affect other elements in the
2761                  * obj->vma_list (due to side-effects from retiring
2762                  * an active vma), play safe and restart the iterator.
2763                  */
2764                 goto restart;
2765         }
2766
2767         /* We can reuse the existing drm_mm nodes but need to change the
2768          * cache-level on the PTE. We could simply unbind them all and
2769          * rebind with the correct cache-level on next use. However since
2770          * we already have a valid slot, dma mapping, pages etc, we may as
2771          * rewrite the PTE in the belief that doing so tramples upon less
2772          * state and so involves less work.
2773          */
2774         if (obj->bind_count) {
2775                 /* Before we change the PTE, the GPU must not be accessing it.
2776                  * If we wait upon the object, we know that all the bound
2777                  * VMA are no longer active.
2778                  */
2779                 ret = i915_gem_object_wait(obj,
2780                                            I915_WAIT_INTERRUPTIBLE |
2781                                            I915_WAIT_LOCKED |
2782                                            I915_WAIT_ALL,
2783                                            MAX_SCHEDULE_TIMEOUT);
2784                 if (ret)
2785                         return ret;
2786
2787                 if (!HAS_LLC(to_i915(obj->base.dev)) &&
2788                     cache_level != I915_CACHE_NONE) {
2789                         /* Access to snoopable pages through the GTT is
2790                          * incoherent and on some machines causes a hard
2791                          * lockup. Relinquish the CPU mmaping to force
2792                          * userspace to refault in the pages and we can
2793                          * then double check if the GTT mapping is still
2794                          * valid for that pointer access.
2795                          */
2796                         i915_gem_release_mmap(obj);
2797
2798                         /* As we no longer need a fence for GTT access,
2799                          * we can relinquish it now (and so prevent having
2800                          * to steal a fence from someone else on the next
2801                          * fence request). Note GPU activity would have
2802                          * dropped the fence as all snoopable access is
2803                          * supposed to be linear.
2804                          */
2805                         for_each_ggtt_vma(vma, obj) {
2806                                 ret = i915_vma_put_fence(vma);
2807                                 if (ret)
2808                                         return ret;
2809                         }
2810                 } else {
2811                         /* We either have incoherent backing store and
2812                          * so no GTT access or the architecture is fully
2813                          * coherent. In such cases, existing GTT mmaps
2814                          * ignore the cache bit in the PTE and we can
2815                          * rewrite it without confusing the GPU or having
2816                          * to force userspace to fault back in its mmaps.
2817                          */
2818                 }
2819
2820                 list_for_each_entry(vma, &obj->vma.list, obj_link) {
2821                         if (!drm_mm_node_allocated(&vma->node))
2822                                 continue;
2823
2824                         ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
2825                         if (ret)
2826                                 return ret;
2827                 }
2828         }
2829
2830         list_for_each_entry(vma, &obj->vma.list, obj_link)
2831                 vma->node.color = cache_level;
2832         i915_gem_object_set_cache_coherency(obj, cache_level);
2833         obj->cache_dirty = true; /* Always invalidate stale cachelines */
2834
2835         return 0;
2836 }
2837
2838 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
2839                                struct drm_file *file)
2840 {
2841         struct drm_i915_gem_caching *args = data;
2842         struct drm_i915_gem_object *obj;
2843         int err = 0;
2844
2845         rcu_read_lock();
2846         obj = i915_gem_object_lookup_rcu(file, args->handle);
2847         if (!obj) {
2848                 err = -ENOENT;
2849                 goto out;
2850         }
2851
2852         switch (obj->cache_level) {
2853         case I915_CACHE_LLC:
2854         case I915_CACHE_L3_LLC:
2855                 args->caching = I915_CACHING_CACHED;
2856                 break;
2857
2858         case I915_CACHE_WT:
2859                 args->caching = I915_CACHING_DISPLAY;
2860                 break;
2861
2862         default:
2863                 args->caching = I915_CACHING_NONE;
2864                 break;
2865         }
2866 out:
2867         rcu_read_unlock();
2868         return err;
2869 }
2870
2871 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
2872                                struct drm_file *file)
2873 {
2874         struct drm_i915_private *i915 = to_i915(dev);
2875         struct drm_i915_gem_caching *args = data;
2876         struct drm_i915_gem_object *obj;
2877         enum i915_cache_level level;
2878         int ret = 0;
2879
2880         switch (args->caching) {
2881         case I915_CACHING_NONE:
2882                 level = I915_CACHE_NONE;
2883                 break;
2884         case I915_CACHING_CACHED:
2885                 /*
2886                  * Due to a HW issue on BXT A stepping, GPU stores via a
2887                  * snooped mapping may leave stale data in a corresponding CPU
2888                  * cacheline, whereas normally such cachelines would get
2889                  * invalidated.
2890                  */
2891                 if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
2892                         return -ENODEV;
2893
2894                 level = I915_CACHE_LLC;
2895                 break;
2896         case I915_CACHING_DISPLAY:
2897                 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
2898                 break;
2899         default:
2900                 return -EINVAL;
2901         }
2902
2903         obj = i915_gem_object_lookup(file, args->handle);
2904         if (!obj)
2905                 return -ENOENT;
2906
2907         /*
2908          * The caching mode of proxy object is handled by its generator, and
2909          * not allowed to be changed by userspace.
2910          */
2911         if (i915_gem_object_is_proxy(obj)) {
2912                 ret = -ENXIO;
2913                 goto out;
2914         }
2915
2916         if (obj->cache_level == level)
2917                 goto out;
2918
2919         ret = i915_gem_object_wait(obj,
2920                                    I915_WAIT_INTERRUPTIBLE,
2921                                    MAX_SCHEDULE_TIMEOUT);
2922         if (ret)
2923                 goto out;
2924
2925         ret = i915_mutex_lock_interruptible(dev);
2926         if (ret)
2927                 goto out;
2928
2929         ret = i915_gem_object_set_cache_level(obj, level);
2930         mutex_unlock(&dev->struct_mutex);
2931
2932 out:
2933         i915_gem_object_put(obj);
2934         return ret;
2935 }
2936
2937 /*
2938  * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
2939  * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
2940  * (for pageflips). We only flush the caches while preparing the buffer for
2941  * display, the callers are responsible for frontbuffer flush.
2942  */
2943 struct i915_vma *
2944 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
2945                                      u32 alignment,
2946                                      const struct i915_ggtt_view *view,
2947                                      unsigned int flags)
2948 {
2949         struct i915_vma *vma;
2950         int ret;
2951
2952         lockdep_assert_held(&obj->base.dev->struct_mutex);
2953
2954         /* Mark the global pin early so that we account for the
2955          * display coherency whilst setting up the cache domains.
2956          */
2957         obj->pin_global++;
2958
2959         /* The display engine is not coherent with the LLC cache on gen6.  As
2960          * a result, we make sure that the pinning that is about to occur is
2961          * done with uncached PTEs. This is lowest common denominator for all
2962          * chipsets.
2963          *
2964          * However for gen6+, we could do better by using the GFDT bit instead
2965          * of uncaching, which would allow us to flush all the LLC-cached data
2966          * with that bit in the PTE to main memory with just one PIPE_CONTROL.
2967          */
2968         ret = i915_gem_object_set_cache_level(obj,
2969                                               HAS_WT(to_i915(obj->base.dev)) ?
2970                                               I915_CACHE_WT : I915_CACHE_NONE);
2971         if (ret) {
2972                 vma = ERR_PTR(ret);
2973                 goto err_unpin_global;
2974         }
2975
2976         /* As the user may map the buffer once pinned in the display plane
2977          * (e.g. libkms for the bootup splash), we have to ensure that we
2978          * always use map_and_fenceable for all scanout buffers. However,
2979          * it may simply be too big to fit into mappable, in which case
2980          * put it anyway and hope that userspace can cope (but always first
2981          * try to preserve the existing ABI).
2982          */
2983         vma = ERR_PTR(-ENOSPC);
2984         if ((flags & PIN_MAPPABLE) == 0 &&
2985             (!view || view->type == I915_GGTT_VIEW_NORMAL))
2986                 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
2987                                                flags |
2988                                                PIN_MAPPABLE |
2989                                                PIN_NONBLOCK);
2990         if (IS_ERR(vma))
2991                 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
2992         if (IS_ERR(vma))
2993                 goto err_unpin_global;
2994
2995         vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
2996
2997         __i915_gem_object_flush_for_display(obj);
2998
2999         /* It should now be out of any other write domains, and we can update
3000          * the domain values for our changes.
3001          */
3002         obj->read_domains |= I915_GEM_DOMAIN_GTT;
3003
3004         return vma;
3005
3006 err_unpin_global:
3007         obj->pin_global--;
3008         return vma;
3009 }
3010
3011 void
3012 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
3013 {
3014         lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
3015
3016         if (WARN_ON(vma->obj->pin_global == 0))
3017                 return;
3018
3019         if (--vma->obj->pin_global == 0)
3020                 vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
3021
3022         /* Bump the LRU to try and avoid premature eviction whilst flipping  */
3023         i915_gem_object_bump_inactive_ggtt(vma->obj);
3024
3025         i915_vma_unpin(vma);
3026 }
3027
3028 /**
3029  * Moves a single object to the CPU read, and possibly write domain.
3030  * @obj: object to act on
3031  * @write: requesting write or read-only access
3032  *
3033  * This function returns when the move is complete, including waiting on
3034  * flushes to occur.
3035  */
3036 int
3037 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3038 {
3039         int ret;
3040
3041         lockdep_assert_held(&obj->base.dev->struct_mutex);
3042
3043         ret = i915_gem_object_wait(obj,
3044                                    I915_WAIT_INTERRUPTIBLE |
3045                                    I915_WAIT_LOCKED |
3046                                    (write ? I915_WAIT_ALL : 0),
3047                                    MAX_SCHEDULE_TIMEOUT);
3048         if (ret)
3049                 return ret;
3050
3051         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
3052
3053         /* Flush the CPU cache if it's still invalid. */
3054         if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3055                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
3056                 obj->read_domains |= I915_GEM_DOMAIN_CPU;
3057         }
3058
3059         /* It should now be out of any other write domains, and we can update
3060          * the domain values for our changes.
3061          */
3062         GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
3063
3064         /* If we're writing through the CPU, then the GPU read domains will
3065          * need to be invalidated at next use.
3066          */
3067         if (write)
3068                 __start_cpu_write(obj);
3069
3070         return 0;
3071 }
3072
3073 /* Throttle our rendering by waiting until the ring has completed our requests
3074  * emitted over 20 msec ago.
3075  *
3076  * Note that if we were to use the current jiffies each time around the loop,
3077  * we wouldn't escape the function with any frames outstanding if the time to
3078  * render a frame was over 20ms.
3079  *
3080  * This should get us reasonable parallelism between CPU and GPU but also
3081  * relatively low latency when blocking on a particular request to finish.
3082  */
3083 static int
3084 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3085 {
3086         struct drm_i915_private *dev_priv = to_i915(dev);
3087         struct drm_i915_file_private *file_priv = file->driver_priv;
3088         unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
3089         struct i915_request *request, *target = NULL;
3090         long ret;
3091
3092         /* ABI: return -EIO if already wedged */
3093         ret = i915_terminally_wedged(dev_priv);
3094         if (ret)
3095                 return ret;
3096
3097         spin_lock(&file_priv->mm.lock);
3098         list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
3099                 if (time_after_eq(request->emitted_jiffies, recent_enough))
3100                         break;
3101
3102                 if (target) {
3103                         list_del(&target->client_link);
3104                         target->file_priv = NULL;
3105                 }
3106
3107                 target = request;
3108         }
3109         if (target)
3110                 i915_request_get(target);
3111         spin_unlock(&file_priv->mm.lock);
3112
3113         if (target == NULL)
3114                 return 0;
3115
3116         ret = i915_request_wait(target,
3117                                 I915_WAIT_INTERRUPTIBLE,
3118                                 MAX_SCHEDULE_TIMEOUT);
3119         i915_request_put(target);
3120
3121         return ret < 0 ? ret : 0;
3122 }
3123
3124 struct i915_vma *
3125 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3126                          const struct i915_ggtt_view *view,
3127                          u64 size,
3128                          u64 alignment,
3129                          u64 flags)
3130 {
3131         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3132         struct i915_address_space *vm = &dev_priv->ggtt.vm;
3133         struct i915_vma *vma;
3134         int ret;
3135
3136         lockdep_assert_held(&obj->base.dev->struct_mutex);
3137
3138         if (flags & PIN_MAPPABLE &&
3139             (!view || view->type == I915_GGTT_VIEW_NORMAL)) {
3140                 /* If the required space is larger than the available
3141                  * aperture, we will not able to find a slot for the
3142                  * object and unbinding the object now will be in
3143                  * vain. Worse, doing so may cause us to ping-pong
3144                  * the object in and out of the Global GTT and
3145                  * waste a lot of cycles under the mutex.
3146                  */
3147                 if (obj->base.size > dev_priv->ggtt.mappable_end)
3148                         return ERR_PTR(-E2BIG);
3149
3150                 /* If NONBLOCK is set the caller is optimistically
3151                  * trying to cache the full object within the mappable
3152                  * aperture, and *must* have a fallback in place for
3153                  * situations where we cannot bind the object. We
3154                  * can be a little more lax here and use the fallback
3155                  * more often to avoid costly migrations of ourselves
3156                  * and other objects within the aperture.
3157                  *
3158                  * Half-the-aperture is used as a simple heuristic.
3159                  * More interesting would to do search for a free
3160                  * block prior to making the commitment to unbind.
3161                  * That caters for the self-harm case, and with a
3162                  * little more heuristics (e.g. NOFAULT, NOEVICT)
3163                  * we could try to minimise harm to others.
3164                  */
3165                 if (flags & PIN_NONBLOCK &&
3166                     obj->base.size > dev_priv->ggtt.mappable_end / 2)
3167                         return ERR_PTR(-ENOSPC);
3168         }
3169
3170         vma = i915_vma_instance(obj, vm, view);
3171         if (IS_ERR(vma))
3172                 return vma;
3173
3174         if (i915_vma_misplaced(vma, size, alignment, flags)) {
3175                 if (flags & PIN_NONBLOCK) {
3176                         if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))
3177                                 return ERR_PTR(-ENOSPC);
3178
3179                         if (flags & PIN_MAPPABLE &&
3180                             vma->fence_size > dev_priv->ggtt.mappable_end / 2)
3181                                 return ERR_PTR(-ENOSPC);
3182                 }
3183
3184                 WARN(i915_vma_is_pinned(vma),
3185                      "bo is already pinned in ggtt with incorrect alignment:"
3186                      " offset=%08x, req.alignment=%llx,"
3187                      " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
3188                      i915_ggtt_offset(vma), alignment,
3189                      !!(flags & PIN_MAPPABLE),
3190                      i915_vma_is_map_and_fenceable(vma));
3191                 ret = i915_vma_unbind(vma);
3192                 if (ret)
3193                         return ERR_PTR(ret);
3194         }
3195
3196         ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
3197         if (ret)
3198                 return ERR_PTR(ret);
3199
3200         return vma;
3201 }
3202
3203 static __always_inline u32 __busy_read_flag(u8 id)
3204 {
3205         if (id == (u8)I915_ENGINE_CLASS_INVALID)
3206                 return 0xffff0000u;
3207
3208         GEM_BUG_ON(id >= 16);
3209         return 0x10000u << id;
3210 }
3211
3212 static __always_inline u32 __busy_write_id(u8 id)
3213 {
3214         /*
3215          * The uABI guarantees an active writer is also amongst the read
3216          * engines. This would be true if we accessed the activity tracking
3217          * under the lock, but as we perform the lookup of the object and
3218          * its activity locklessly we can not guarantee that the last_write
3219          * being active implies that we have set the same engine flag from
3220          * last_read - hence we always set both read and write busy for
3221          * last_write.
3222          */
3223         if (id == (u8)I915_ENGINE_CLASS_INVALID)
3224                 return 0xffffffffu;
3225
3226         return (id + 1) | __busy_read_flag(id);
3227 }
3228
3229 static __always_inline unsigned int
3230 __busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u8 id))
3231 {
3232         const struct i915_request *rq;
3233
3234         /*
3235          * We have to check the current hw status of the fence as the uABI
3236          * guarantees forward progress. We could rely on the idle worker
3237          * to eventually flush us, but to minimise latency just ask the
3238          * hardware.
3239          *
3240          * Note we only report on the status of native fences.
3241          */
3242         if (!dma_fence_is_i915(fence))
3243                 return 0;
3244
3245         /* opencode to_request() in order to avoid const warnings */
3246         rq = container_of(fence, const struct i915_request, fence);
3247         if (i915_request_completed(rq))
3248                 return 0;
3249
3250         /* Beware type-expansion follies! */
3251         BUILD_BUG_ON(!typecheck(u8, rq->engine->uabi_class));
3252         return flag(rq->engine->uabi_class);
3253 }
3254
3255 static __always_inline unsigned int
3256 busy_check_reader(const struct dma_fence *fence)
3257 {
3258         return __busy_set_if_active(fence, __busy_read_flag);
3259 }
3260
3261 static __always_inline unsigned int
3262 busy_check_writer(const struct dma_fence *fence)
3263 {
3264         if (!fence)
3265                 return 0;
3266
3267         return __busy_set_if_active(fence, __busy_write_id);
3268 }
3269
3270 int
3271 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3272                     struct drm_file *file)
3273 {
3274         struct drm_i915_gem_busy *args = data;
3275         struct drm_i915_gem_object *obj;
3276         struct reservation_object_list *list;
3277         unsigned int seq;
3278         int err;
3279
3280         err = -ENOENT;
3281         rcu_read_lock();
3282         obj = i915_gem_object_lookup_rcu(file, args->handle);
3283         if (!obj)
3284                 goto out;
3285
3286         /*
3287          * A discrepancy here is that we do not report the status of
3288          * non-i915 fences, i.e. even though we may report the object as idle,
3289          * a call to set-domain may still stall waiting for foreign rendering.
3290          * This also means that wait-ioctl may report an object as busy,
3291          * where busy-ioctl considers it idle.
3292          *
3293          * We trade the ability to warn of foreign fences to report on which
3294          * i915 engines are active for the object.
3295          *
3296          * Alternatively, we can trade that extra information on read/write
3297          * activity with
3298          *      args->busy =
3299          *              !reservation_object_test_signaled_rcu(obj->resv, true);
3300          * to report the overall busyness. This is what the wait-ioctl does.
3301          *
3302          */
3303 retry:
3304         seq = raw_read_seqcount(&obj->resv->seq);
3305
3306         /* Translate the exclusive fence to the READ *and* WRITE engine */
3307         args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
3308
3309         /* Translate shared fences to READ set of engines */
3310         list = rcu_dereference(obj->resv->fence);
3311         if (list) {
3312                 unsigned int shared_count = list->shared_count, i;
3313
3314                 for (i = 0; i < shared_count; ++i) {
3315                         struct dma_fence *fence =
3316                                 rcu_dereference(list->shared[i]);
3317
3318                         args->busy |= busy_check_reader(fence);
3319                 }
3320         }
3321
3322         if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
3323                 goto retry;
3324
3325         err = 0;
3326 out:
3327         rcu_read_unlock();
3328         return err;
3329 }
3330
3331 int
3332 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3333                         struct drm_file *file_priv)
3334 {
3335         return i915_gem_ring_throttle(dev, file_priv);
3336 }
3337
3338 int
3339 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3340                        struct drm_file *file_priv)
3341 {
3342         struct drm_i915_private *dev_priv = to_i915(dev);
3343         struct drm_i915_gem_madvise *args = data;
3344         struct drm_i915_gem_object *obj;
3345         int err;
3346
3347         switch (args->madv) {
3348         case I915_MADV_DONTNEED:
3349         case I915_MADV_WILLNEED:
3350             break;
3351         default:
3352             return -EINVAL;
3353         }
3354
3355         obj = i915_gem_object_lookup(file_priv, args->handle);
3356         if (!obj)
3357                 return -ENOENT;
3358
3359         err = mutex_lock_interruptible(&obj->mm.lock);
3360         if (err)
3361                 goto out;
3362
3363         if (i915_gem_object_has_pages(obj) &&
3364             i915_gem_object_is_tiled(obj) &&
3365             dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
3366                 if (obj->mm.madv == I915_MADV_WILLNEED) {
3367                         GEM_BUG_ON(!obj->mm.quirked);
3368                         __i915_gem_object_unpin_pages(obj);
3369                         obj->mm.quirked = false;
3370                 }
3371                 if (args->madv == I915_MADV_WILLNEED) {
3372                         GEM_BUG_ON(obj->mm.quirked);
3373                         __i915_gem_object_pin_pages(obj);
3374                         obj->mm.quirked = true;
3375                 }
3376         }
3377
3378         if (obj->mm.madv != __I915_MADV_PURGED)
3379                 obj->mm.madv = args->madv;
3380
3381         /* if the object is no longer attached, discard its backing storage */
3382         if (obj->mm.madv == I915_MADV_DONTNEED &&
3383             !i915_gem_object_has_pages(obj))
3384                 __i915_gem_object_truncate(obj);
3385
3386         args->retained = obj->mm.madv != __I915_MADV_PURGED;
3387         mutex_unlock(&obj->mm.lock);
3388
3389 out:
3390         i915_gem_object_put(obj);
3391         return err;
3392 }
3393
3394 void i915_gem_sanitize(struct drm_i915_private *i915)
3395 {
3396         intel_wakeref_t wakeref;
3397
3398         GEM_TRACE("\n");
3399
3400         wakeref = intel_runtime_pm_get(i915);
3401         intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
3402
3403         /*
3404          * As we have just resumed the machine and woken the device up from
3405          * deep PCI sleep (presumably D3_cold), assume the HW has been reset
3406          * back to defaults, recovering from whatever wedged state we left it
3407          * in and so worth trying to use the device once more.
3408          */
3409         if (i915_terminally_wedged(i915))
3410                 i915_gem_unset_wedged(i915);
3411
3412         /*
3413          * If we inherit context state from the BIOS or earlier occupants
3414          * of the GPU, the GPU may be in an inconsistent state when we
3415          * try to take over. The only way to remove the earlier state
3416          * is by resetting. However, resetting on earlier gen is tricky as
3417          * it may impact the display and we are uncertain about the stability
3418          * of the reset, so this could be applied to even earlier gen.
3419          */
3420         intel_gt_sanitize(i915, false);
3421
3422         intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
3423         intel_runtime_pm_put(i915, wakeref);
3424
3425         mutex_lock(&i915->drm.struct_mutex);
3426         i915_gem_contexts_lost(i915);
3427         mutex_unlock(&i915->drm.struct_mutex);
3428 }
3429
3430 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
3431 {
3432         if (INTEL_GEN(dev_priv) < 5 ||
3433             dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
3434                 return;
3435
3436         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
3437                                  DISP_TILE_SURFACE_SWIZZLING);
3438
3439         if (IS_GEN(dev_priv, 5))
3440                 return;
3441
3442         I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
3443         if (IS_GEN(dev_priv, 6))
3444                 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
3445         else if (IS_GEN(dev_priv, 7))
3446                 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
3447         else if (IS_GEN(dev_priv, 8))
3448                 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
3449         else
3450                 BUG();
3451 }
3452
3453 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base)
3454 {
3455         I915_WRITE(RING_CTL(base), 0);
3456         I915_WRITE(RING_HEAD(base), 0);
3457         I915_WRITE(RING_TAIL(base), 0);
3458         I915_WRITE(RING_START(base), 0);
3459 }
3460
3461 static void init_unused_rings(struct drm_i915_private *dev_priv)
3462 {
3463         if (IS_I830(dev_priv)) {
3464                 init_unused_ring(dev_priv, PRB1_BASE);
3465                 init_unused_ring(dev_priv, SRB0_BASE);
3466                 init_unused_ring(dev_priv, SRB1_BASE);
3467                 init_unused_ring(dev_priv, SRB2_BASE);
3468                 init_unused_ring(dev_priv, SRB3_BASE);
3469         } else if (IS_GEN(dev_priv, 2)) {
3470                 init_unused_ring(dev_priv, SRB0_BASE);
3471                 init_unused_ring(dev_priv, SRB1_BASE);
3472         } else if (IS_GEN(dev_priv, 3)) {
3473                 init_unused_ring(dev_priv, PRB1_BASE);
3474                 init_unused_ring(dev_priv, PRB2_BASE);
3475         }
3476 }
3477
3478 int i915_gem_init_hw(struct drm_i915_private *dev_priv)
3479 {
3480         int ret;
3481
3482         dev_priv->gt.last_init_time = ktime_get();
3483
3484         /* Double layer security blanket, see i915_gem_init() */
3485         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
3486
3487         if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9)
3488                 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
3489
3490         if (IS_HASWELL(dev_priv))
3491                 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ?
3492                            LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
3493
3494         /* Apply the GT workarounds... */
3495         intel_gt_apply_workarounds(dev_priv);
3496         /* ...and determine whether they are sticking. */
3497         intel_gt_verify_workarounds(dev_priv, "init");
3498
3499         i915_gem_init_swizzling(dev_priv);
3500
3501         /*
3502          * At least 830 can leave some of the unused rings
3503          * "active" (ie. head != tail) after resume which
3504          * will prevent c3 entry. Makes sure all unused rings
3505          * are totally idle.
3506          */
3507         init_unused_rings(dev_priv);
3508
3509         BUG_ON(!dev_priv->kernel_context);
3510         ret = i915_terminally_wedged(dev_priv);
3511         if (ret)
3512                 goto out;
3513
3514         ret = i915_ppgtt_init_hw(dev_priv);
3515         if (ret) {
3516                 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
3517                 goto out;
3518         }
3519
3520         ret = intel_wopcm_init_hw(&dev_priv->wopcm);
3521         if (ret) {
3522                 DRM_ERROR("Enabling WOPCM failed (%d)\n", ret);
3523                 goto out;
3524         }
3525
3526         /* We can't enable contexts until all firmware is loaded */
3527         ret = intel_uc_init_hw(dev_priv);
3528         if (ret) {
3529                 DRM_ERROR("Enabling uc failed (%d)\n", ret);
3530                 goto out;
3531         }
3532
3533         intel_mocs_init_l3cc_table(dev_priv);
3534
3535         /* Only when the HW is re-initialised, can we replay the requests */
3536         ret = intel_engines_resume(dev_priv);
3537         if (ret)
3538                 goto cleanup_uc;
3539
3540         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
3541
3542         intel_engines_set_scheduler_caps(dev_priv);
3543         return 0;
3544
3545 cleanup_uc:
3546         intel_uc_fini_hw(dev_priv);
3547 out:
3548         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
3549
3550         return ret;
3551 }
3552
3553 static int __intel_engines_record_defaults(struct drm_i915_private *i915)
3554 {
3555         struct intel_engine_cs *engine;
3556         struct i915_gem_context *ctx;
3557         struct i915_gem_engines *e;
3558         enum intel_engine_id id;
3559         int err = 0;
3560
3561         /*
3562          * As we reset the gpu during very early sanitisation, the current
3563          * register state on the GPU should reflect its defaults values.
3564          * We load a context onto the hw (with restore-inhibit), then switch
3565          * over to a second context to save that default register state. We
3566          * can then prime every new context with that state so they all start
3567          * from the same default HW values.
3568          */
3569
3570         ctx = i915_gem_context_create_kernel(i915, 0);
3571         if (IS_ERR(ctx))
3572                 return PTR_ERR(ctx);
3573
3574         e = i915_gem_context_lock_engines(ctx);
3575
3576         for_each_engine(engine, i915, id) {
3577                 struct intel_context *ce = e->engines[id];
3578                 struct i915_request *rq;
3579
3580                 rq = intel_context_create_request(ce);
3581                 if (IS_ERR(rq)) {
3582                         err = PTR_ERR(rq);
3583                         goto err_active;
3584                 }
3585
3586                 err = 0;
3587                 if (rq->engine->init_context)
3588                         err = rq->engine->init_context(rq);
3589
3590                 i915_request_add(rq);
3591                 if (err)
3592                         goto err_active;
3593         }
3594
3595         /* Flush the default context image to memory, and enable powersaving. */
3596         if (!i915_gem_load_power_context(i915)) {
3597                 err = -EIO;
3598                 goto err_active;
3599         }
3600
3601         for_each_engine(engine, i915, id) {
3602                 struct intel_context *ce = e->engines[id];
3603                 struct i915_vma *state = ce->state;
3604                 void *vaddr;
3605
3606                 if (!state)
3607                         continue;
3608
3609                 GEM_BUG_ON(intel_context_is_pinned(ce));
3610
3611                 /*
3612                  * As we will hold a reference to the logical state, it will
3613                  * not be torn down with the context, and importantly the
3614                  * object will hold onto its vma (making it possible for a
3615                  * stray GTT write to corrupt our defaults). Unmap the vma
3616                  * from the GTT to prevent such accidents and reclaim the
3617                  * space.
3618                  */
3619                 err = i915_vma_unbind(state);
3620                 if (err)
3621                         goto err_active;
3622
3623                 err = i915_gem_object_set_to_cpu_domain(state->obj, false);
3624                 if (err)
3625                         goto err_active;
3626
3627                 engine->default_state = i915_gem_object_get(state->obj);
3628                 i915_gem_object_set_cache_coherency(engine->default_state,
3629                                                     I915_CACHE_LLC);
3630
3631                 /* Check we can acquire the image of the context state */
3632                 vaddr = i915_gem_object_pin_map(engine->default_state,
3633                                                 I915_MAP_FORCE_WB);
3634                 if (IS_ERR(vaddr)) {
3635                         err = PTR_ERR(vaddr);
3636                         goto err_active;
3637                 }
3638
3639                 i915_gem_object_unpin_map(engine->default_state);
3640         }
3641
3642         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
3643                 unsigned int found = intel_engines_has_context_isolation(i915);
3644
3645                 /*
3646                  * Make sure that classes with multiple engine instances all
3647                  * share the same basic configuration.
3648                  */
3649                 for_each_engine(engine, i915, id) {
3650                         unsigned int bit = BIT(engine->uabi_class);
3651                         unsigned int expected = engine->default_state ? bit : 0;
3652
3653                         if ((found & bit) != expected) {
3654                                 DRM_ERROR("mismatching default context state for class %d on engine %s\n",
3655                                           engine->uabi_class, engine->name);
3656                         }
3657                 }
3658         }
3659
3660 out_ctx:
3661         i915_gem_context_unlock_engines(ctx);
3662         i915_gem_context_set_closed(ctx);
3663         i915_gem_context_put(ctx);
3664         return err;
3665
3666 err_active:
3667         /*
3668          * If we have to abandon now, we expect the engines to be idle
3669          * and ready to be torn-down. The quickest way we can accomplish
3670          * this is by declaring ourselves wedged.
3671          */
3672         i915_gem_set_wedged(i915);
3673         goto out_ctx;
3674 }
3675
3676 static int
3677 i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size)
3678 {
3679         struct drm_i915_gem_object *obj;
3680         struct i915_vma *vma;
3681         int ret;
3682
3683         obj = i915_gem_object_create_stolen(i915, size);
3684         if (!obj)
3685                 obj = i915_gem_object_create_internal(i915, size);
3686         if (IS_ERR(obj)) {
3687                 DRM_ERROR("Failed to allocate scratch page\n");
3688                 return PTR_ERR(obj);
3689         }
3690
3691         vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
3692         if (IS_ERR(vma)) {
3693                 ret = PTR_ERR(vma);
3694                 goto err_unref;
3695         }
3696
3697         ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
3698         if (ret)
3699                 goto err_unref;
3700
3701         i915->gt.scratch = vma;
3702         return 0;
3703
3704 err_unref:
3705         i915_gem_object_put(obj);
3706         return ret;
3707 }
3708
3709 static void i915_gem_fini_scratch(struct drm_i915_private *i915)
3710 {
3711         i915_vma_unpin_and_release(&i915->gt.scratch, 0);
3712 }
3713
3714 static int intel_engines_verify_workarounds(struct drm_i915_private *i915)
3715 {
3716         struct intel_engine_cs *engine;
3717         enum intel_engine_id id;
3718         int err = 0;
3719
3720         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3721                 return 0;
3722
3723         for_each_engine(engine, i915, id) {
3724                 if (intel_engine_verify_workarounds(engine, "load"))
3725                         err = -EIO;
3726         }
3727
3728         return err;
3729 }
3730
3731 int i915_gem_init(struct drm_i915_private *dev_priv)
3732 {
3733         int ret;
3734
3735         /* We need to fallback to 4K pages if host doesn't support huge gtt. */
3736         if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv))
3737                 mkwrite_device_info(dev_priv)->page_sizes =
3738                         I915_GTT_PAGE_SIZE_4K;
3739
3740         dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
3741
3742         i915_timelines_init(dev_priv);
3743
3744         ret = i915_gem_init_userptr(dev_priv);
3745         if (ret)
3746                 return ret;
3747
3748         ret = intel_uc_init_misc(dev_priv);
3749         if (ret)
3750                 return ret;
3751
3752         ret = intel_wopcm_init(&dev_priv->wopcm);
3753         if (ret)
3754                 goto err_uc_misc;
3755
3756         /* This is just a security blanket to placate dragons.
3757          * On some systems, we very sporadically observe that the first TLBs
3758          * used by the CS may be stale, despite us poking the TLB reset. If
3759          * we hold the forcewake during initialisation these problems
3760          * just magically go away.
3761          */
3762         mutex_lock(&dev_priv->drm.struct_mutex);
3763         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
3764
3765         ret = i915_gem_init_ggtt(dev_priv);
3766         if (ret) {
3767                 GEM_BUG_ON(ret == -EIO);
3768                 goto err_unlock;
3769         }
3770
3771         ret = i915_gem_init_scratch(dev_priv,
3772                                     IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE);
3773         if (ret) {
3774                 GEM_BUG_ON(ret == -EIO);
3775                 goto err_ggtt;
3776         }
3777
3778         ret = intel_engines_setup(dev_priv);
3779         if (ret) {
3780                 GEM_BUG_ON(ret == -EIO);
3781                 goto err_unlock;
3782         }
3783
3784         ret = i915_gem_contexts_init(dev_priv);
3785         if (ret) {
3786                 GEM_BUG_ON(ret == -EIO);
3787                 goto err_scratch;
3788         }
3789
3790         ret = intel_engines_init(dev_priv);
3791         if (ret) {
3792                 GEM_BUG_ON(ret == -EIO);
3793                 goto err_context;
3794         }
3795
3796         intel_init_gt_powersave(dev_priv);
3797
3798         ret = intel_uc_init(dev_priv);
3799         if (ret)
3800                 goto err_pm;
3801
3802         ret = i915_gem_init_hw(dev_priv);
3803         if (ret)
3804                 goto err_uc_init;
3805
3806         /*
3807          * Despite its name intel_init_clock_gating applies both display
3808          * clock gating workarounds; GT mmio workarounds and the occasional
3809          * GT power context workaround. Worse, sometimes it includes a context
3810          * register workaround which we need to apply before we record the
3811          * default HW state for all contexts.
3812          *
3813          * FIXME: break up the workarounds and apply them at the right time!
3814          */
3815         intel_init_clock_gating(dev_priv);
3816
3817         ret = intel_engines_verify_workarounds(dev_priv);
3818         if (ret)
3819                 goto err_init_hw;
3820
3821         ret = __intel_engines_record_defaults(dev_priv);
3822         if (ret)
3823                 goto err_init_hw;
3824
3825         if (i915_inject_load_failure()) {
3826                 ret = -ENODEV;
3827                 goto err_init_hw;
3828         }
3829
3830         if (i915_inject_load_failure()) {
3831                 ret = -EIO;
3832                 goto err_init_hw;
3833         }
3834
3835         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
3836         mutex_unlock(&dev_priv->drm.struct_mutex);
3837
3838         return 0;
3839
3840         /*
3841          * Unwinding is complicated by that we want to handle -EIO to mean
3842          * disable GPU submission but keep KMS alive. We want to mark the
3843          * HW as irrevisibly wedged, but keep enough state around that the
3844          * driver doesn't explode during runtime.
3845          */
3846 err_init_hw:
3847         mutex_unlock(&dev_priv->drm.struct_mutex);
3848
3849         i915_gem_set_wedged(dev_priv);
3850         i915_gem_suspend(dev_priv);
3851         i915_gem_suspend_late(dev_priv);
3852
3853         i915_gem_drain_workqueue(dev_priv);
3854
3855         mutex_lock(&dev_priv->drm.struct_mutex);
3856         intel_uc_fini_hw(dev_priv);
3857 err_uc_init:
3858         intel_uc_fini(dev_priv);
3859 err_pm:
3860         if (ret != -EIO) {
3861                 intel_cleanup_gt_powersave(dev_priv);
3862                 intel_engines_cleanup(dev_priv);
3863         }
3864 err_context:
3865         if (ret != -EIO)
3866                 i915_gem_contexts_fini(dev_priv);
3867 err_scratch:
3868         i915_gem_fini_scratch(dev_priv);
3869 err_ggtt:
3870 err_unlock:
3871         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
3872         mutex_unlock(&dev_priv->drm.struct_mutex);
3873
3874 err_uc_misc:
3875         intel_uc_fini_misc(dev_priv);
3876
3877         if (ret != -EIO) {
3878                 i915_gem_cleanup_userptr(dev_priv);
3879                 i915_timelines_fini(dev_priv);
3880         }
3881
3882         if (ret == -EIO) {
3883                 mutex_lock(&dev_priv->drm.struct_mutex);
3884
3885                 /*
3886                  * Allow engine initialisation to fail by marking the GPU as
3887                  * wedged. But we only want to do this where the GPU is angry,
3888                  * for all other failure, such as an allocation failure, bail.
3889                  */
3890                 if (!i915_reset_failed(dev_priv)) {
3891                         i915_load_error(dev_priv,
3892                                         "Failed to initialize GPU, declaring it wedged!\n");
3893                         i915_gem_set_wedged(dev_priv);
3894                 }
3895
3896                 /* Minimal basic recovery for KMS */
3897                 ret = i915_ggtt_enable_hw(dev_priv);
3898                 i915_gem_restore_gtt_mappings(dev_priv);
3899                 i915_gem_restore_fences(dev_priv);
3900                 intel_init_clock_gating(dev_priv);
3901
3902                 mutex_unlock(&dev_priv->drm.struct_mutex);
3903         }
3904
3905         i915_gem_drain_freed_objects(dev_priv);
3906         return ret;
3907 }
3908
3909 void i915_gem_fini(struct drm_i915_private *dev_priv)
3910 {
3911         GEM_BUG_ON(dev_priv->gt.awake);
3912
3913         intel_wakeref_auto_fini(&dev_priv->mm.userfault_wakeref);
3914
3915         i915_gem_suspend_late(dev_priv);
3916         intel_disable_gt_powersave(dev_priv);
3917
3918         /* Flush any outstanding unpin_work. */
3919         i915_gem_drain_workqueue(dev_priv);
3920
3921         mutex_lock(&dev_priv->drm.struct_mutex);
3922         intel_uc_fini_hw(dev_priv);
3923         intel_uc_fini(dev_priv);
3924         intel_engines_cleanup(dev_priv);
3925         i915_gem_contexts_fini(dev_priv);
3926         i915_gem_fini_scratch(dev_priv);
3927         mutex_unlock(&dev_priv->drm.struct_mutex);
3928
3929         intel_wa_list_free(&dev_priv->gt_wa_list);
3930
3931         intel_cleanup_gt_powersave(dev_priv);
3932
3933         intel_uc_fini_misc(dev_priv);
3934         i915_gem_cleanup_userptr(dev_priv);
3935         i915_timelines_fini(dev_priv);
3936
3937         i915_gem_drain_freed_objects(dev_priv);
3938
3939         WARN_ON(!list_empty(&dev_priv->contexts.list));
3940 }
3941
3942 void i915_gem_init_mmio(struct drm_i915_private *i915)
3943 {
3944         i915_gem_sanitize(i915);
3945 }
3946
3947 void
3948 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
3949 {
3950         int i;
3951
3952         if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) &&
3953             !IS_CHERRYVIEW(dev_priv))
3954                 dev_priv->num_fence_regs = 32;
3955         else if (INTEL_GEN(dev_priv) >= 4 ||
3956                  IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
3957                  IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
3958                 dev_priv->num_fence_regs = 16;
3959         else
3960                 dev_priv->num_fence_regs = 8;
3961
3962         if (intel_vgpu_active(dev_priv))
3963                 dev_priv->num_fence_regs =
3964                                 I915_READ(vgtif_reg(avail_rs.fence_num));
3965
3966         /* Initialize fence registers to zero */
3967         for (i = 0; i < dev_priv->num_fence_regs; i++) {
3968                 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
3969
3970                 fence->i915 = dev_priv;
3971                 fence->id = i;
3972                 list_add_tail(&fence->link, &dev_priv->mm.fence_list);
3973         }
3974         i915_gem_restore_fences(dev_priv);
3975
3976         i915_gem_detect_bit_6_swizzle(dev_priv);
3977 }
3978
3979 static void i915_gem_init__mm(struct drm_i915_private *i915)
3980 {
3981         spin_lock_init(&i915->mm.object_stat_lock);
3982         spin_lock_init(&i915->mm.obj_lock);
3983         spin_lock_init(&i915->mm.free_lock);
3984
3985         init_llist_head(&i915->mm.free_list);
3986
3987         INIT_LIST_HEAD(&i915->mm.unbound_list);
3988         INIT_LIST_HEAD(&i915->mm.bound_list);
3989         INIT_LIST_HEAD(&i915->mm.fence_list);
3990
3991         INIT_LIST_HEAD(&i915->mm.userfault_list);
3992         intel_wakeref_auto_init(&i915->mm.userfault_wakeref, i915);
3993
3994         i915_gem_init__objects(i915);
3995 }
3996
3997 int i915_gem_init_early(struct drm_i915_private *dev_priv)
3998 {
3999         int err;
4000
4001         intel_gt_pm_init(dev_priv);
4002
4003         INIT_LIST_HEAD(&dev_priv->gt.active_rings);
4004         INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
4005
4006         i915_gem_init__mm(dev_priv);
4007         i915_gem_init__pm(dev_priv);
4008
4009         init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
4010         init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
4011         mutex_init(&dev_priv->gpu_error.wedge_mutex);
4012         init_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
4013
4014         atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
4015
4016         spin_lock_init(&dev_priv->fb_tracking.lock);
4017
4018         err = i915_gemfs_init(dev_priv);
4019         if (err)
4020                 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err);
4021
4022         return 0;
4023 }
4024
4025 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
4026 {
4027         i915_gem_drain_freed_objects(dev_priv);
4028         GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
4029         GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
4030         WARN_ON(dev_priv->mm.object_count);
4031
4032         cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
4033
4034         i915_gemfs_fini(dev_priv);
4035 }
4036
4037 int i915_gem_freeze(struct drm_i915_private *dev_priv)
4038 {
4039         /* Discard all purgeable objects, let userspace recover those as
4040          * required after resuming.
4041          */
4042         i915_gem_shrink_all(dev_priv);
4043
4044         return 0;
4045 }
4046
4047 int i915_gem_freeze_late(struct drm_i915_private *i915)
4048 {
4049         struct drm_i915_gem_object *obj;
4050         struct list_head *phases[] = {
4051                 &i915->mm.unbound_list,
4052                 &i915->mm.bound_list,
4053                 NULL
4054         }, **phase;
4055
4056         /*
4057          * Called just before we write the hibernation image.
4058          *
4059          * We need to update the domain tracking to reflect that the CPU
4060          * will be accessing all the pages to create and restore from the
4061          * hibernation, and so upon restoration those pages will be in the
4062          * CPU domain.
4063          *
4064          * To make sure the hibernation image contains the latest state,
4065          * we update that state just before writing out the image.
4066          *
4067          * To try and reduce the hibernation image, we manually shrink
4068          * the objects as well, see i915_gem_freeze()
4069          */
4070
4071         i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND);
4072         i915_gem_drain_freed_objects(i915);
4073
4074         mutex_lock(&i915->drm.struct_mutex);
4075         for (phase = phases; *phase; phase++) {
4076                 list_for_each_entry(obj, *phase, mm.link)
4077                         WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true));
4078         }
4079         mutex_unlock(&i915->drm.struct_mutex);
4080
4081         return 0;
4082 }
4083
4084 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4085 {
4086         struct drm_i915_file_private *file_priv = file->driver_priv;
4087         struct i915_request *request;
4088
4089         /* Clean up our request list when the client is going away, so that
4090          * later retire_requests won't dereference our soon-to-be-gone
4091          * file_priv.
4092          */
4093         spin_lock(&file_priv->mm.lock);
4094         list_for_each_entry(request, &file_priv->mm.request_list, client_link)
4095                 request->file_priv = NULL;
4096         spin_unlock(&file_priv->mm.lock);
4097 }
4098
4099 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
4100 {
4101         struct drm_i915_file_private *file_priv;
4102         int ret;
4103
4104         DRM_DEBUG("\n");
4105
4106         file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
4107         if (!file_priv)
4108                 return -ENOMEM;
4109
4110         file->driver_priv = file_priv;
4111         file_priv->dev_priv = i915;
4112         file_priv->file = file;
4113
4114         spin_lock_init(&file_priv->mm.lock);
4115         INIT_LIST_HEAD(&file_priv->mm.request_list);
4116
4117         file_priv->bsd_engine = -1;
4118         file_priv->hang_timestamp = jiffies;
4119
4120         ret = i915_gem_context_open(i915, file);
4121         if (ret)
4122                 kfree(file_priv);
4123
4124         return ret;
4125 }
4126
4127 /**
4128  * i915_gem_track_fb - update frontbuffer tracking
4129  * @old: current GEM buffer for the frontbuffer slots
4130  * @new: new GEM buffer for the frontbuffer slots
4131  * @frontbuffer_bits: bitmask of frontbuffer slots
4132  *
4133  * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
4134  * from @old and setting them in @new. Both @old and @new can be NULL.
4135  */
4136 void i915_gem_track_fb(struct drm_i915_gem_object *old,
4137                        struct drm_i915_gem_object *new,
4138                        unsigned frontbuffer_bits)
4139 {
4140         /* Control of individual bits within the mask are guarded by
4141          * the owning plane->mutex, i.e. we can never see concurrent
4142          * manipulation of individual bits. But since the bitfield as a whole
4143          * is updated using RMW, we need to use atomics in order to update
4144          * the bits.
4145          */
4146         BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
4147                      BITS_PER_TYPE(atomic_t));
4148
4149         if (old) {
4150                 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
4151                 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
4152         }
4153
4154         if (new) {
4155                 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
4156                 atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
4157         }
4158 }
4159
4160 struct scatterlist *
4161 i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
4162                        unsigned int n,
4163                        unsigned int *offset)
4164 {
4165         struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
4166         struct scatterlist *sg;
4167         unsigned int idx, count;
4168
4169         might_sleep();
4170         GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
4171         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
4172
4173         /* As we iterate forward through the sg, we record each entry in a
4174          * radixtree for quick repeated (backwards) lookups. If we have seen
4175          * this index previously, we will have an entry for it.
4176          *
4177          * Initial lookup is O(N), but this is amortized to O(1) for
4178          * sequential page access (where each new request is consecutive
4179          * to the previous one). Repeated lookups are O(lg(obj->base.size)),
4180          * i.e. O(1) with a large constant!
4181          */
4182         if (n < READ_ONCE(iter->sg_idx))
4183                 goto lookup;
4184
4185         mutex_lock(&iter->lock);
4186
4187         /* We prefer to reuse the last sg so that repeated lookup of this
4188          * (or the subsequent) sg are fast - comparing against the last
4189          * sg is faster than going through the radixtree.
4190          */
4191
4192         sg = iter->sg_pos;
4193         idx = iter->sg_idx;
4194         count = __sg_page_count(sg);
4195
4196         while (idx + count <= n) {
4197                 void *entry;
4198                 unsigned long i;
4199                 int ret;
4200
4201                 /* If we cannot allocate and insert this entry, or the
4202                  * individual pages from this range, cancel updating the
4203                  * sg_idx so that on this lookup we are forced to linearly
4204                  * scan onwards, but on future lookups we will try the
4205                  * insertion again (in which case we need to be careful of
4206                  * the error return reporting that we have already inserted
4207                  * this index).
4208                  */
4209                 ret = radix_tree_insert(&iter->radix, idx, sg);
4210                 if (ret && ret != -EEXIST)
4211                         goto scan;
4212
4213                 entry = xa_mk_value(idx);
4214                 for (i = 1; i < count; i++) {
4215                         ret = radix_tree_insert(&iter->radix, idx + i, entry);
4216                         if (ret && ret != -EEXIST)
4217                                 goto scan;
4218                 }
4219
4220                 idx += count;
4221                 sg = ____sg_next(sg);
4222                 count = __sg_page_count(sg);
4223         }
4224
4225 scan:
4226         iter->sg_pos = sg;
4227         iter->sg_idx = idx;
4228
4229         mutex_unlock(&iter->lock);
4230
4231         if (unlikely(n < idx)) /* insertion completed by another thread */
4232                 goto lookup;
4233
4234         /* In case we failed to insert the entry into the radixtree, we need
4235          * to look beyond the current sg.
4236          */
4237         while (idx + count <= n) {
4238                 idx += count;
4239                 sg = ____sg_next(sg);
4240                 count = __sg_page_count(sg);
4241         }
4242
4243         *offset = n - idx;
4244         return sg;
4245
4246 lookup:
4247         rcu_read_lock();
4248
4249         sg = radix_tree_lookup(&iter->radix, n);
4250         GEM_BUG_ON(!sg);
4251
4252         /* If this index is in the middle of multi-page sg entry,
4253          * the radix tree will contain a value entry that points
4254          * to the start of that range. We will return the pointer to
4255          * the base page and the offset of this page within the
4256          * sg entry's range.
4257          */
4258         *offset = 0;
4259         if (unlikely(xa_is_value(sg))) {
4260                 unsigned long base = xa_to_value(sg);
4261
4262                 sg = radix_tree_lookup(&iter->radix, base);
4263                 GEM_BUG_ON(!sg);
4264
4265                 *offset = n - base;
4266         }
4267
4268         rcu_read_unlock();
4269
4270         return sg;
4271 }
4272
4273 struct page *
4274 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
4275 {
4276         struct scatterlist *sg;
4277         unsigned int offset;
4278
4279         GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
4280
4281         sg = i915_gem_object_get_sg(obj, n, &offset);
4282         return nth_page(sg_page(sg), offset);
4283 }
4284
4285 /* Like i915_gem_object_get_page(), but mark the returned page dirty */
4286 struct page *
4287 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
4288                                unsigned int n)
4289 {
4290         struct page *page;
4291
4292         page = i915_gem_object_get_page(obj, n);
4293         if (!obj->mm.dirty)
4294                 set_page_dirty(page);
4295
4296         return page;
4297 }
4298
4299 dma_addr_t
4300 i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj,
4301                                     unsigned long n,
4302                                     unsigned int *len)
4303 {
4304         struct scatterlist *sg;
4305         unsigned int offset;
4306
4307         sg = i915_gem_object_get_sg(obj, n, &offset);
4308
4309         if (len)
4310                 *len = sg_dma_len(sg) - (offset << PAGE_SHIFT);
4311
4312         return sg_dma_address(sg) + (offset << PAGE_SHIFT);
4313 }
4314
4315 dma_addr_t
4316 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
4317                                 unsigned long n)
4318 {
4319         return i915_gem_object_get_dma_address_len(obj, n, NULL);
4320 }
4321
4322
4323 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
4324 {
4325         struct sg_table *pages;
4326         int err;
4327
4328         if (align > obj->base.size)
4329                 return -EINVAL;
4330
4331         if (obj->ops == &i915_gem_phys_ops)
4332                 return 0;
4333
4334         if (obj->ops != &i915_gem_shmem_ops)
4335                 return -EINVAL;
4336
4337         err = i915_gem_object_unbind(obj);
4338         if (err)
4339                 return err;
4340
4341         mutex_lock(&obj->mm.lock);
4342
4343         if (obj->mm.madv != I915_MADV_WILLNEED) {
4344                 err = -EFAULT;
4345                 goto err_unlock;
4346         }
4347
4348         if (obj->mm.quirked) {
4349                 err = -EFAULT;
4350                 goto err_unlock;
4351         }
4352
4353         if (obj->mm.mapping) {
4354                 err = -EBUSY;
4355                 goto err_unlock;
4356         }
4357
4358         pages = __i915_gem_object_unset_pages(obj);
4359
4360         obj->ops = &i915_gem_phys_ops;
4361
4362         err = ____i915_gem_object_get_pages(obj);
4363         if (err)
4364                 goto err_xfer;
4365
4366         /* Perma-pin (until release) the physical set of pages */
4367         __i915_gem_object_pin_pages(obj);
4368
4369         if (!IS_ERR_OR_NULL(pages))
4370                 i915_gem_shmem_ops.put_pages(obj, pages);
4371         mutex_unlock(&obj->mm.lock);
4372         return 0;
4373
4374 err_xfer:
4375         obj->ops = &i915_gem_shmem_ops;
4376         if (!IS_ERR_OR_NULL(pages)) {
4377                 unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
4378
4379                 __i915_gem_object_set_pages(obj, pages, sg_page_sizes);
4380         }
4381 err_unlock:
4382         mutex_unlock(&obj->mm.lock);
4383         return err;
4384 }
4385
4386 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
4387 #include "selftests/scatterlist.c"
4388 #include "selftests/mock_gem_device.c"
4389 #include "selftests/huge_gem_object.c"
4390 #include "selftests/huge_pages.c"
4391 #include "selftests/i915_gem_object.c"
4392 #include "selftests/i915_gem_coherency.c"
4393 #include "selftests/i915_gem.c"
4394 #endif