]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/gpu/drm/i915/i915_gem.c
f6cdd5fb9deb9e3bb6ab5e87ac3b4f84644e42a2
[linux.git] / drivers / gpu / drm / i915 / i915_gem.c
1 /*
2  * Copyright © 2008-2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27
28 #include <drm/drm_vma_manager.h>
29 #include <drm/drm_pci.h>
30 #include <drm/i915_drm.h>
31 #include <linux/dma-fence-array.h>
32 #include <linux/kthread.h>
33 #include <linux/reservation.h>
34 #include <linux/shmem_fs.h>
35 #include <linux/slab.h>
36 #include <linux/stop_machine.h>
37 #include <linux/swap.h>
38 #include <linux/pci.h>
39 #include <linux/dma-buf.h>
40 #include <linux/mman.h>
41
42 #include "i915_drv.h"
43 #include "i915_gem_clflush.h"
44 #include "i915_gemfs.h"
45 #include "i915_globals.h"
46 #include "i915_reset.h"
47 #include "i915_trace.h"
48 #include "i915_vgpu.h"
49
50 #include "intel_drv.h"
51 #include "intel_frontbuffer.h"
52 #include "intel_mocs.h"
53 #include "intel_workarounds.h"
54
55 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
56
57 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
58 {
59         if (obj->cache_dirty)
60                 return false;
61
62         if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
63                 return true;
64
65         return obj->pin_global; /* currently in use by HW, keep flushed */
66 }
67
68 static int
69 insert_mappable_node(struct i915_ggtt *ggtt,
70                      struct drm_mm_node *node, u32 size)
71 {
72         memset(node, 0, sizeof(*node));
73         return drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
74                                            size, 0, I915_COLOR_UNEVICTABLE,
75                                            0, ggtt->mappable_end,
76                                            DRM_MM_INSERT_LOW);
77 }
78
79 static void
80 remove_mappable_node(struct drm_mm_node *node)
81 {
82         drm_mm_remove_node(node);
83 }
84
85 /* some bookkeeping */
86 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
87                                   u64 size)
88 {
89         spin_lock(&dev_priv->mm.object_stat_lock);
90         dev_priv->mm.object_count++;
91         dev_priv->mm.object_memory += size;
92         spin_unlock(&dev_priv->mm.object_stat_lock);
93 }
94
95 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
96                                      u64 size)
97 {
98         spin_lock(&dev_priv->mm.object_stat_lock);
99         dev_priv->mm.object_count--;
100         dev_priv->mm.object_memory -= size;
101         spin_unlock(&dev_priv->mm.object_stat_lock);
102 }
103
104 static void __i915_gem_park(struct drm_i915_private *i915)
105 {
106         intel_wakeref_t wakeref;
107
108         GEM_TRACE("\n");
109
110         lockdep_assert_held(&i915->drm.struct_mutex);
111         GEM_BUG_ON(i915->gt.active_requests);
112         GEM_BUG_ON(!list_empty(&i915->gt.active_rings));
113
114         if (!i915->gt.awake)
115                 return;
116
117         /*
118          * Be paranoid and flush a concurrent interrupt to make sure
119          * we don't reactivate any irq tasklets after parking.
120          *
121          * FIXME: Note that even though we have waited for execlists to be idle,
122          * there may still be an in-flight interrupt even though the CSB
123          * is now empty. synchronize_irq() makes sure that a residual interrupt
124          * is completed before we continue, but it doesn't prevent the HW from
125          * raising a spurious interrupt later. To complete the shield we should
126          * coordinate disabling the CS irq with flushing the interrupts.
127          */
128         synchronize_irq(i915->drm.irq);
129
130         intel_engines_park(i915);
131         i915_timelines_park(i915);
132
133         i915_pmu_gt_parked(i915);
134         i915_vma_parked(i915);
135
136         wakeref = fetch_and_zero(&i915->gt.awake);
137         GEM_BUG_ON(!wakeref);
138
139         if (INTEL_GEN(i915) >= 6)
140                 gen6_rps_idle(i915);
141
142         intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
143
144         i915_globals_park();
145 }
146
147 void i915_gem_park(struct drm_i915_private *i915)
148 {
149         GEM_TRACE("\n");
150
151         lockdep_assert_held(&i915->drm.struct_mutex);
152         GEM_BUG_ON(i915->gt.active_requests);
153
154         if (!i915->gt.awake)
155                 return;
156
157         /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */
158         mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100));
159 }
160
161 void i915_gem_unpark(struct drm_i915_private *i915)
162 {
163         GEM_TRACE("\n");
164
165         lockdep_assert_held(&i915->drm.struct_mutex);
166         GEM_BUG_ON(!i915->gt.active_requests);
167         assert_rpm_wakelock_held(i915);
168
169         if (i915->gt.awake)
170                 return;
171
172         /*
173          * It seems that the DMC likes to transition between the DC states a lot
174          * when there are no connected displays (no active power domains) during
175          * command submission.
176          *
177          * This activity has negative impact on the performance of the chip with
178          * huge latencies observed in the interrupt handler and elsewhere.
179          *
180          * Work around it by grabbing a GT IRQ power domain whilst there is any
181          * GT activity, preventing any DC state transitions.
182          */
183         i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
184         GEM_BUG_ON(!i915->gt.awake);
185
186         i915_globals_unpark();
187
188         intel_enable_gt_powersave(i915);
189         i915_update_gfx_val(i915);
190         if (INTEL_GEN(i915) >= 6)
191                 gen6_rps_busy(i915);
192         i915_pmu_gt_unparked(i915);
193
194         intel_engines_unpark(i915);
195
196         i915_queue_hangcheck(i915);
197
198         queue_delayed_work(i915->wq,
199                            &i915->gt.retire_work,
200                            round_jiffies_up_relative(HZ));
201 }
202
203 int
204 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
205                             struct drm_file *file)
206 {
207         struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
208         struct drm_i915_gem_get_aperture *args = data;
209         struct i915_vma *vma;
210         u64 pinned;
211
212         mutex_lock(&ggtt->vm.mutex);
213
214         pinned = ggtt->vm.reserved;
215         list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
216                 if (i915_vma_is_pinned(vma))
217                         pinned += vma->node.size;
218
219         mutex_unlock(&ggtt->vm.mutex);
220
221         args->aper_size = ggtt->vm.total;
222         args->aper_available_size = args->aper_size - pinned;
223
224         return 0;
225 }
226
227 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
228 {
229         struct address_space *mapping = obj->base.filp->f_mapping;
230         drm_dma_handle_t *phys;
231         struct sg_table *st;
232         struct scatterlist *sg;
233         char *vaddr;
234         int i;
235         int err;
236
237         if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
238                 return -EINVAL;
239
240         /* Always aligning to the object size, allows a single allocation
241          * to handle all possible callers, and given typical object sizes,
242          * the alignment of the buddy allocation will naturally match.
243          */
244         phys = drm_pci_alloc(obj->base.dev,
245                              roundup_pow_of_two(obj->base.size),
246                              roundup_pow_of_two(obj->base.size));
247         if (!phys)
248                 return -ENOMEM;
249
250         vaddr = phys->vaddr;
251         for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
252                 struct page *page;
253                 char *src;
254
255                 page = shmem_read_mapping_page(mapping, i);
256                 if (IS_ERR(page)) {
257                         err = PTR_ERR(page);
258                         goto err_phys;
259                 }
260
261                 src = kmap_atomic(page);
262                 memcpy(vaddr, src, PAGE_SIZE);
263                 drm_clflush_virt_range(vaddr, PAGE_SIZE);
264                 kunmap_atomic(src);
265
266                 put_page(page);
267                 vaddr += PAGE_SIZE;
268         }
269
270         i915_gem_chipset_flush(to_i915(obj->base.dev));
271
272         st = kmalloc(sizeof(*st), GFP_KERNEL);
273         if (!st) {
274                 err = -ENOMEM;
275                 goto err_phys;
276         }
277
278         if (sg_alloc_table(st, 1, GFP_KERNEL)) {
279                 kfree(st);
280                 err = -ENOMEM;
281                 goto err_phys;
282         }
283
284         sg = st->sgl;
285         sg->offset = 0;
286         sg->length = obj->base.size;
287
288         sg_dma_address(sg) = phys->busaddr;
289         sg_dma_len(sg) = obj->base.size;
290
291         obj->phys_handle = phys;
292
293         __i915_gem_object_set_pages(obj, st, sg->length);
294
295         return 0;
296
297 err_phys:
298         drm_pci_free(obj->base.dev, phys);
299
300         return err;
301 }
302
303 static void __start_cpu_write(struct drm_i915_gem_object *obj)
304 {
305         obj->read_domains = I915_GEM_DOMAIN_CPU;
306         obj->write_domain = I915_GEM_DOMAIN_CPU;
307         if (cpu_write_needs_clflush(obj))
308                 obj->cache_dirty = true;
309 }
310
311 static void
312 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
313                                 struct sg_table *pages,
314                                 bool needs_clflush)
315 {
316         GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
317
318         if (obj->mm.madv == I915_MADV_DONTNEED)
319                 obj->mm.dirty = false;
320
321         if (needs_clflush &&
322             (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
323             !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
324                 drm_clflush_sg(pages);
325
326         __start_cpu_write(obj);
327 }
328
329 static void
330 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
331                                struct sg_table *pages)
332 {
333         __i915_gem_object_release_shmem(obj, pages, false);
334
335         if (obj->mm.dirty) {
336                 struct address_space *mapping = obj->base.filp->f_mapping;
337                 char *vaddr = obj->phys_handle->vaddr;
338                 int i;
339
340                 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
341                         struct page *page;
342                         char *dst;
343
344                         page = shmem_read_mapping_page(mapping, i);
345                         if (IS_ERR(page))
346                                 continue;
347
348                         dst = kmap_atomic(page);
349                         drm_clflush_virt_range(vaddr, PAGE_SIZE);
350                         memcpy(dst, vaddr, PAGE_SIZE);
351                         kunmap_atomic(dst);
352
353                         set_page_dirty(page);
354                         if (obj->mm.madv == I915_MADV_WILLNEED)
355                                 mark_page_accessed(page);
356                         put_page(page);
357                         vaddr += PAGE_SIZE;
358                 }
359                 obj->mm.dirty = false;
360         }
361
362         sg_free_table(pages);
363         kfree(pages);
364
365         drm_pci_free(obj->base.dev, obj->phys_handle);
366 }
367
368 static void
369 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
370 {
371         i915_gem_object_unpin_pages(obj);
372 }
373
374 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
375         .get_pages = i915_gem_object_get_pages_phys,
376         .put_pages = i915_gem_object_put_pages_phys,
377         .release = i915_gem_object_release_phys,
378 };
379
380 static const struct drm_i915_gem_object_ops i915_gem_object_ops;
381
382 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
383 {
384         struct i915_vma *vma;
385         LIST_HEAD(still_in_list);
386         int ret;
387
388         lockdep_assert_held(&obj->base.dev->struct_mutex);
389
390         /* Closed vma are removed from the obj->vma_list - but they may
391          * still have an active binding on the object. To remove those we
392          * must wait for all rendering to complete to the object (as unbinding
393          * must anyway), and retire the requests.
394          */
395         ret = i915_gem_object_set_to_cpu_domain(obj, false);
396         if (ret)
397                 return ret;
398
399         spin_lock(&obj->vma.lock);
400         while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
401                                                        struct i915_vma,
402                                                        obj_link))) {
403                 list_move_tail(&vma->obj_link, &still_in_list);
404                 spin_unlock(&obj->vma.lock);
405
406                 ret = i915_vma_unbind(vma);
407
408                 spin_lock(&obj->vma.lock);
409         }
410         list_splice(&still_in_list, &obj->vma.list);
411         spin_unlock(&obj->vma.lock);
412
413         return ret;
414 }
415
416 static long
417 i915_gem_object_wait_fence(struct dma_fence *fence,
418                            unsigned int flags,
419                            long timeout)
420 {
421         struct i915_request *rq;
422
423         BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
424
425         if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
426                 return timeout;
427
428         if (!dma_fence_is_i915(fence))
429                 return dma_fence_wait_timeout(fence,
430                                               flags & I915_WAIT_INTERRUPTIBLE,
431                                               timeout);
432
433         rq = to_request(fence);
434         if (i915_request_completed(rq))
435                 goto out;
436
437         timeout = i915_request_wait(rq, flags, timeout);
438
439 out:
440         if (flags & I915_WAIT_LOCKED && i915_request_completed(rq))
441                 i915_request_retire_upto(rq);
442
443         return timeout;
444 }
445
446 static long
447 i915_gem_object_wait_reservation(struct reservation_object *resv,
448                                  unsigned int flags,
449                                  long timeout)
450 {
451         unsigned int seq = __read_seqcount_begin(&resv->seq);
452         struct dma_fence *excl;
453         bool prune_fences = false;
454
455         if (flags & I915_WAIT_ALL) {
456                 struct dma_fence **shared;
457                 unsigned int count, i;
458                 int ret;
459
460                 ret = reservation_object_get_fences_rcu(resv,
461                                                         &excl, &count, &shared);
462                 if (ret)
463                         return ret;
464
465                 for (i = 0; i < count; i++) {
466                         timeout = i915_gem_object_wait_fence(shared[i],
467                                                              flags, timeout);
468                         if (timeout < 0)
469                                 break;
470
471                         dma_fence_put(shared[i]);
472                 }
473
474                 for (; i < count; i++)
475                         dma_fence_put(shared[i]);
476                 kfree(shared);
477
478                 /*
479                  * If both shared fences and an exclusive fence exist,
480                  * then by construction the shared fences must be later
481                  * than the exclusive fence. If we successfully wait for
482                  * all the shared fences, we know that the exclusive fence
483                  * must all be signaled. If all the shared fences are
484                  * signaled, we can prune the array and recover the
485                  * floating references on the fences/requests.
486                  */
487                 prune_fences = count && timeout >= 0;
488         } else {
489                 excl = reservation_object_get_excl_rcu(resv);
490         }
491
492         if (excl && timeout >= 0)
493                 timeout = i915_gem_object_wait_fence(excl, flags, timeout);
494
495         dma_fence_put(excl);
496
497         /*
498          * Opportunistically prune the fences iff we know they have *all* been
499          * signaled and that the reservation object has not been changed (i.e.
500          * no new fences have been added).
501          */
502         if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
503                 if (reservation_object_trylock(resv)) {
504                         if (!__read_seqcount_retry(&resv->seq, seq))
505                                 reservation_object_add_excl_fence(resv, NULL);
506                         reservation_object_unlock(resv);
507                 }
508         }
509
510         return timeout;
511 }
512
513 static void __fence_set_priority(struct dma_fence *fence,
514                                  const struct i915_sched_attr *attr)
515 {
516         struct i915_request *rq;
517         struct intel_engine_cs *engine;
518
519         if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
520                 return;
521
522         rq = to_request(fence);
523         engine = rq->engine;
524
525         local_bh_disable();
526         rcu_read_lock(); /* RCU serialisation for set-wedged protection */
527         if (engine->schedule)
528                 engine->schedule(rq, attr);
529         rcu_read_unlock();
530         local_bh_enable(); /* kick the tasklets if queues were reprioritised */
531 }
532
533 static void fence_set_priority(struct dma_fence *fence,
534                                const struct i915_sched_attr *attr)
535 {
536         /* Recurse once into a fence-array */
537         if (dma_fence_is_array(fence)) {
538                 struct dma_fence_array *array = to_dma_fence_array(fence);
539                 int i;
540
541                 for (i = 0; i < array->num_fences; i++)
542                         __fence_set_priority(array->fences[i], attr);
543         } else {
544                 __fence_set_priority(fence, attr);
545         }
546 }
547
548 int
549 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
550                               unsigned int flags,
551                               const struct i915_sched_attr *attr)
552 {
553         struct dma_fence *excl;
554
555         if (flags & I915_WAIT_ALL) {
556                 struct dma_fence **shared;
557                 unsigned int count, i;
558                 int ret;
559
560                 ret = reservation_object_get_fences_rcu(obj->resv,
561                                                         &excl, &count, &shared);
562                 if (ret)
563                         return ret;
564
565                 for (i = 0; i < count; i++) {
566                         fence_set_priority(shared[i], attr);
567                         dma_fence_put(shared[i]);
568                 }
569
570                 kfree(shared);
571         } else {
572                 excl = reservation_object_get_excl_rcu(obj->resv);
573         }
574
575         if (excl) {
576                 fence_set_priority(excl, attr);
577                 dma_fence_put(excl);
578         }
579         return 0;
580 }
581
582 /**
583  * Waits for rendering to the object to be completed
584  * @obj: i915 gem object
585  * @flags: how to wait (under a lock, for all rendering or just for writes etc)
586  * @timeout: how long to wait
587  */
588 int
589 i915_gem_object_wait(struct drm_i915_gem_object *obj,
590                      unsigned int flags,
591                      long timeout)
592 {
593         might_sleep();
594         GEM_BUG_ON(timeout < 0);
595
596         timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout);
597         return timeout < 0 ? timeout : 0;
598 }
599
600 static int
601 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
602                      struct drm_i915_gem_pwrite *args,
603                      struct drm_file *file)
604 {
605         void *vaddr = obj->phys_handle->vaddr + args->offset;
606         char __user *user_data = u64_to_user_ptr(args->data_ptr);
607
608         /* We manually control the domain here and pretend that it
609          * remains coherent i.e. in the GTT domain, like shmem_pwrite.
610          */
611         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
612         if (copy_from_user(vaddr, user_data, args->size))
613                 return -EFAULT;
614
615         drm_clflush_virt_range(vaddr, args->size);
616         i915_gem_chipset_flush(to_i915(obj->base.dev));
617
618         intel_fb_obj_flush(obj, ORIGIN_CPU);
619         return 0;
620 }
621
622 static int
623 i915_gem_create(struct drm_file *file,
624                 struct drm_i915_private *dev_priv,
625                 u64 size,
626                 u32 *handle_p)
627 {
628         struct drm_i915_gem_object *obj;
629         int ret;
630         u32 handle;
631
632         size = roundup(size, PAGE_SIZE);
633         if (size == 0)
634                 return -EINVAL;
635
636         /* Allocate the new object */
637         obj = i915_gem_object_create(dev_priv, size);
638         if (IS_ERR(obj))
639                 return PTR_ERR(obj);
640
641         ret = drm_gem_handle_create(file, &obj->base, &handle);
642         /* drop reference from allocate - handle holds it now */
643         i915_gem_object_put(obj);
644         if (ret)
645                 return ret;
646
647         *handle_p = handle;
648         return 0;
649 }
650
651 int
652 i915_gem_dumb_create(struct drm_file *file,
653                      struct drm_device *dev,
654                      struct drm_mode_create_dumb *args)
655 {
656         /* have to work out size/pitch and return them */
657         args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
658         args->size = args->pitch * args->height;
659         return i915_gem_create(file, to_i915(dev),
660                                args->size, &args->handle);
661 }
662
663 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
664 {
665         return !(obj->cache_level == I915_CACHE_NONE ||
666                  obj->cache_level == I915_CACHE_WT);
667 }
668
669 /**
670  * Creates a new mm object and returns a handle to it.
671  * @dev: drm device pointer
672  * @data: ioctl data blob
673  * @file: drm file pointer
674  */
675 int
676 i915_gem_create_ioctl(struct drm_device *dev, void *data,
677                       struct drm_file *file)
678 {
679         struct drm_i915_private *dev_priv = to_i915(dev);
680         struct drm_i915_gem_create *args = data;
681
682         i915_gem_flush_free_objects(dev_priv);
683
684         return i915_gem_create(file, dev_priv,
685                                args->size, &args->handle);
686 }
687
688 static inline enum fb_op_origin
689 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
690 {
691         return (domain == I915_GEM_DOMAIN_GTT ?
692                 obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
693 }
694
695 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
696 {
697         intel_wakeref_t wakeref;
698
699         /*
700          * No actual flushing is required for the GTT write domain for reads
701          * from the GTT domain. Writes to it "immediately" go to main memory
702          * as far as we know, so there's no chipset flush. It also doesn't
703          * land in the GPU render cache.
704          *
705          * However, we do have to enforce the order so that all writes through
706          * the GTT land before any writes to the device, such as updates to
707          * the GATT itself.
708          *
709          * We also have to wait a bit for the writes to land from the GTT.
710          * An uncached read (i.e. mmio) seems to be ideal for the round-trip
711          * timing. This issue has only been observed when switching quickly
712          * between GTT writes and CPU reads from inside the kernel on recent hw,
713          * and it appears to only affect discrete GTT blocks (i.e. on LLC
714          * system agents we cannot reproduce this behaviour, until Cannonlake
715          * that was!).
716          */
717
718         wmb();
719
720         if (INTEL_INFO(dev_priv)->has_coherent_ggtt)
721                 return;
722
723         i915_gem_chipset_flush(dev_priv);
724
725         with_intel_runtime_pm(dev_priv, wakeref) {
726                 spin_lock_irq(&dev_priv->uncore.lock);
727
728                 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
729
730                 spin_unlock_irq(&dev_priv->uncore.lock);
731         }
732 }
733
734 static void
735 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
736 {
737         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
738         struct i915_vma *vma;
739
740         if (!(obj->write_domain & flush_domains))
741                 return;
742
743         switch (obj->write_domain) {
744         case I915_GEM_DOMAIN_GTT:
745                 i915_gem_flush_ggtt_writes(dev_priv);
746
747                 intel_fb_obj_flush(obj,
748                                    fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
749
750                 for_each_ggtt_vma(vma, obj) {
751                         if (vma->iomap)
752                                 continue;
753
754                         i915_vma_unset_ggtt_write(vma);
755                 }
756                 break;
757
758         case I915_GEM_DOMAIN_WC:
759                 wmb();
760                 break;
761
762         case I915_GEM_DOMAIN_CPU:
763                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
764                 break;
765
766         case I915_GEM_DOMAIN_RENDER:
767                 if (gpu_write_needs_clflush(obj))
768                         obj->cache_dirty = true;
769                 break;
770         }
771
772         obj->write_domain = 0;
773 }
774
775 /*
776  * Pins the specified object's pages and synchronizes the object with
777  * GPU accesses. Sets needs_clflush to non-zero if the caller should
778  * flush the object from the CPU cache.
779  */
780 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
781                                     unsigned int *needs_clflush)
782 {
783         int ret;
784
785         lockdep_assert_held(&obj->base.dev->struct_mutex);
786
787         *needs_clflush = 0;
788         if (!i915_gem_object_has_struct_page(obj))
789                 return -ENODEV;
790
791         ret = i915_gem_object_wait(obj,
792                                    I915_WAIT_INTERRUPTIBLE |
793                                    I915_WAIT_LOCKED,
794                                    MAX_SCHEDULE_TIMEOUT);
795         if (ret)
796                 return ret;
797
798         ret = i915_gem_object_pin_pages(obj);
799         if (ret)
800                 return ret;
801
802         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
803             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
804                 ret = i915_gem_object_set_to_cpu_domain(obj, false);
805                 if (ret)
806                         goto err_unpin;
807                 else
808                         goto out;
809         }
810
811         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
812
813         /* If we're not in the cpu read domain, set ourself into the gtt
814          * read domain and manually flush cachelines (if required). This
815          * optimizes for the case when the gpu will dirty the data
816          * anyway again before the next pread happens.
817          */
818         if (!obj->cache_dirty &&
819             !(obj->read_domains & I915_GEM_DOMAIN_CPU))
820                 *needs_clflush = CLFLUSH_BEFORE;
821
822 out:
823         /* return with the pages pinned */
824         return 0;
825
826 err_unpin:
827         i915_gem_object_unpin_pages(obj);
828         return ret;
829 }
830
831 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
832                                      unsigned int *needs_clflush)
833 {
834         int ret;
835
836         lockdep_assert_held(&obj->base.dev->struct_mutex);
837
838         *needs_clflush = 0;
839         if (!i915_gem_object_has_struct_page(obj))
840                 return -ENODEV;
841
842         ret = i915_gem_object_wait(obj,
843                                    I915_WAIT_INTERRUPTIBLE |
844                                    I915_WAIT_LOCKED |
845                                    I915_WAIT_ALL,
846                                    MAX_SCHEDULE_TIMEOUT);
847         if (ret)
848                 return ret;
849
850         ret = i915_gem_object_pin_pages(obj);
851         if (ret)
852                 return ret;
853
854         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
855             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
856                 ret = i915_gem_object_set_to_cpu_domain(obj, true);
857                 if (ret)
858                         goto err_unpin;
859                 else
860                         goto out;
861         }
862
863         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
864
865         /* If we're not in the cpu write domain, set ourself into the
866          * gtt write domain and manually flush cachelines (as required).
867          * This optimizes for the case when the gpu will use the data
868          * right away and we therefore have to clflush anyway.
869          */
870         if (!obj->cache_dirty) {
871                 *needs_clflush |= CLFLUSH_AFTER;
872
873                 /*
874                  * Same trick applies to invalidate partially written
875                  * cachelines read before writing.
876                  */
877                 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
878                         *needs_clflush |= CLFLUSH_BEFORE;
879         }
880
881 out:
882         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
883         obj->mm.dirty = true;
884         /* return with the pages pinned */
885         return 0;
886
887 err_unpin:
888         i915_gem_object_unpin_pages(obj);
889         return ret;
890 }
891
892 static int
893 shmem_pread(struct page *page, int offset, int len, char __user *user_data,
894             bool needs_clflush)
895 {
896         char *vaddr;
897         int ret;
898
899         vaddr = kmap(page);
900
901         if (needs_clflush)
902                 drm_clflush_virt_range(vaddr + offset, len);
903
904         ret = __copy_to_user(user_data, vaddr + offset, len);
905
906         kunmap(page);
907
908         return ret ? -EFAULT : 0;
909 }
910
911 static int
912 i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
913                      struct drm_i915_gem_pread *args)
914 {
915         char __user *user_data;
916         u64 remain;
917         unsigned int needs_clflush;
918         unsigned int idx, offset;
919         int ret;
920
921         ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
922         if (ret)
923                 return ret;
924
925         ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
926         mutex_unlock(&obj->base.dev->struct_mutex);
927         if (ret)
928                 return ret;
929
930         remain = args->size;
931         user_data = u64_to_user_ptr(args->data_ptr);
932         offset = offset_in_page(args->offset);
933         for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
934                 struct page *page = i915_gem_object_get_page(obj, idx);
935                 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
936
937                 ret = shmem_pread(page, offset, length, user_data,
938                                   needs_clflush);
939                 if (ret)
940                         break;
941
942                 remain -= length;
943                 user_data += length;
944                 offset = 0;
945         }
946
947         i915_gem_obj_finish_shmem_access(obj);
948         return ret;
949 }
950
951 static inline bool
952 gtt_user_read(struct io_mapping *mapping,
953               loff_t base, int offset,
954               char __user *user_data, int length)
955 {
956         void __iomem *vaddr;
957         unsigned long unwritten;
958
959         /* We can use the cpu mem copy function because this is X86. */
960         vaddr = io_mapping_map_atomic_wc(mapping, base);
961         unwritten = __copy_to_user_inatomic(user_data,
962                                             (void __force *)vaddr + offset,
963                                             length);
964         io_mapping_unmap_atomic(vaddr);
965         if (unwritten) {
966                 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
967                 unwritten = copy_to_user(user_data,
968                                          (void __force *)vaddr + offset,
969                                          length);
970                 io_mapping_unmap(vaddr);
971         }
972         return unwritten;
973 }
974
975 static int
976 i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
977                    const struct drm_i915_gem_pread *args)
978 {
979         struct drm_i915_private *i915 = to_i915(obj->base.dev);
980         struct i915_ggtt *ggtt = &i915->ggtt;
981         intel_wakeref_t wakeref;
982         struct drm_mm_node node;
983         struct i915_vma *vma;
984         void __user *user_data;
985         u64 remain, offset;
986         int ret;
987
988         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
989         if (ret)
990                 return ret;
991
992         wakeref = intel_runtime_pm_get(i915);
993         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
994                                        PIN_MAPPABLE |
995                                        PIN_NONFAULT |
996                                        PIN_NONBLOCK);
997         if (!IS_ERR(vma)) {
998                 node.start = i915_ggtt_offset(vma);
999                 node.allocated = false;
1000                 ret = i915_vma_put_fence(vma);
1001                 if (ret) {
1002                         i915_vma_unpin(vma);
1003                         vma = ERR_PTR(ret);
1004                 }
1005         }
1006         if (IS_ERR(vma)) {
1007                 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1008                 if (ret)
1009                         goto out_unlock;
1010                 GEM_BUG_ON(!node.allocated);
1011         }
1012
1013         ret = i915_gem_object_set_to_gtt_domain(obj, false);
1014         if (ret)
1015                 goto out_unpin;
1016
1017         mutex_unlock(&i915->drm.struct_mutex);
1018
1019         user_data = u64_to_user_ptr(args->data_ptr);
1020         remain = args->size;
1021         offset = args->offset;
1022
1023         while (remain > 0) {
1024                 /* Operation in this page
1025                  *
1026                  * page_base = page offset within aperture
1027                  * page_offset = offset within page
1028                  * page_length = bytes to copy for this page
1029                  */
1030                 u32 page_base = node.start;
1031                 unsigned page_offset = offset_in_page(offset);
1032                 unsigned page_length = PAGE_SIZE - page_offset;
1033                 page_length = remain < page_length ? remain : page_length;
1034                 if (node.allocated) {
1035                         wmb();
1036                         ggtt->vm.insert_page(&ggtt->vm,
1037                                              i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1038                                              node.start, I915_CACHE_NONE, 0);
1039                         wmb();
1040                 } else {
1041                         page_base += offset & PAGE_MASK;
1042                 }
1043
1044                 if (gtt_user_read(&ggtt->iomap, page_base, page_offset,
1045                                   user_data, page_length)) {
1046                         ret = -EFAULT;
1047                         break;
1048                 }
1049
1050                 remain -= page_length;
1051                 user_data += page_length;
1052                 offset += page_length;
1053         }
1054
1055         mutex_lock(&i915->drm.struct_mutex);
1056 out_unpin:
1057         if (node.allocated) {
1058                 wmb();
1059                 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
1060                 remove_mappable_node(&node);
1061         } else {
1062                 i915_vma_unpin(vma);
1063         }
1064 out_unlock:
1065         intel_runtime_pm_put(i915, wakeref);
1066         mutex_unlock(&i915->drm.struct_mutex);
1067
1068         return ret;
1069 }
1070
1071 /**
1072  * Reads data from the object referenced by handle.
1073  * @dev: drm device pointer
1074  * @data: ioctl data blob
1075  * @file: drm file pointer
1076  *
1077  * On error, the contents of *data are undefined.
1078  */
1079 int
1080 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
1081                      struct drm_file *file)
1082 {
1083         struct drm_i915_gem_pread *args = data;
1084         struct drm_i915_gem_object *obj;
1085         int ret;
1086
1087         if (args->size == 0)
1088                 return 0;
1089
1090         if (!access_ok(u64_to_user_ptr(args->data_ptr),
1091                        args->size))
1092                 return -EFAULT;
1093
1094         obj = i915_gem_object_lookup(file, args->handle);
1095         if (!obj)
1096                 return -ENOENT;
1097
1098         /* Bounds check source.  */
1099         if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1100                 ret = -EINVAL;
1101                 goto out;
1102         }
1103
1104         trace_i915_gem_object_pread(obj, args->offset, args->size);
1105
1106         ret = i915_gem_object_wait(obj,
1107                                    I915_WAIT_INTERRUPTIBLE,
1108                                    MAX_SCHEDULE_TIMEOUT);
1109         if (ret)
1110                 goto out;
1111
1112         ret = i915_gem_object_pin_pages(obj);
1113         if (ret)
1114                 goto out;
1115
1116         ret = i915_gem_shmem_pread(obj, args);
1117         if (ret == -EFAULT || ret == -ENODEV)
1118                 ret = i915_gem_gtt_pread(obj, args);
1119
1120         i915_gem_object_unpin_pages(obj);
1121 out:
1122         i915_gem_object_put(obj);
1123         return ret;
1124 }
1125
1126 /* This is the fast write path which cannot handle
1127  * page faults in the source data
1128  */
1129
1130 static inline bool
1131 ggtt_write(struct io_mapping *mapping,
1132            loff_t base, int offset,
1133            char __user *user_data, int length)
1134 {
1135         void __iomem *vaddr;
1136         unsigned long unwritten;
1137
1138         /* We can use the cpu mem copy function because this is X86. */
1139         vaddr = io_mapping_map_atomic_wc(mapping, base);
1140         unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset,
1141                                                       user_data, length);
1142         io_mapping_unmap_atomic(vaddr);
1143         if (unwritten) {
1144                 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
1145                 unwritten = copy_from_user((void __force *)vaddr + offset,
1146                                            user_data, length);
1147                 io_mapping_unmap(vaddr);
1148         }
1149
1150         return unwritten;
1151 }
1152
1153 /**
1154  * This is the fast pwrite path, where we copy the data directly from the
1155  * user into the GTT, uncached.
1156  * @obj: i915 GEM object
1157  * @args: pwrite arguments structure
1158  */
1159 static int
1160 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
1161                          const struct drm_i915_gem_pwrite *args)
1162 {
1163         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1164         struct i915_ggtt *ggtt = &i915->ggtt;
1165         intel_wakeref_t wakeref;
1166         struct drm_mm_node node;
1167         struct i915_vma *vma;
1168         u64 remain, offset;
1169         void __user *user_data;
1170         int ret;
1171
1172         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1173         if (ret)
1174                 return ret;
1175
1176         if (i915_gem_object_has_struct_page(obj)) {
1177                 /*
1178                  * Avoid waking the device up if we can fallback, as
1179                  * waking/resuming is very slow (worst-case 10-100 ms
1180                  * depending on PCI sleeps and our own resume time).
1181                  * This easily dwarfs any performance advantage from
1182                  * using the cache bypass of indirect GGTT access.
1183                  */
1184                 wakeref = intel_runtime_pm_get_if_in_use(i915);
1185                 if (!wakeref) {
1186                         ret = -EFAULT;
1187                         goto out_unlock;
1188                 }
1189         } else {
1190                 /* No backing pages, no fallback, we must force GGTT access */
1191                 wakeref = intel_runtime_pm_get(i915);
1192         }
1193
1194         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1195                                        PIN_MAPPABLE |
1196                                        PIN_NONFAULT |
1197                                        PIN_NONBLOCK);
1198         if (!IS_ERR(vma)) {
1199                 node.start = i915_ggtt_offset(vma);
1200                 node.allocated = false;
1201                 ret = i915_vma_put_fence(vma);
1202                 if (ret) {
1203                         i915_vma_unpin(vma);
1204                         vma = ERR_PTR(ret);
1205                 }
1206         }
1207         if (IS_ERR(vma)) {
1208                 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1209                 if (ret)
1210                         goto out_rpm;
1211                 GEM_BUG_ON(!node.allocated);
1212         }
1213
1214         ret = i915_gem_object_set_to_gtt_domain(obj, true);
1215         if (ret)
1216                 goto out_unpin;
1217
1218         mutex_unlock(&i915->drm.struct_mutex);
1219
1220         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1221
1222         user_data = u64_to_user_ptr(args->data_ptr);
1223         offset = args->offset;
1224         remain = args->size;
1225         while (remain) {
1226                 /* Operation in this page
1227                  *
1228                  * page_base = page offset within aperture
1229                  * page_offset = offset within page
1230                  * page_length = bytes to copy for this page
1231                  */
1232                 u32 page_base = node.start;
1233                 unsigned int page_offset = offset_in_page(offset);
1234                 unsigned int page_length = PAGE_SIZE - page_offset;
1235                 page_length = remain < page_length ? remain : page_length;
1236                 if (node.allocated) {
1237                         wmb(); /* flush the write before we modify the GGTT */
1238                         ggtt->vm.insert_page(&ggtt->vm,
1239                                              i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1240                                              node.start, I915_CACHE_NONE, 0);
1241                         wmb(); /* flush modifications to the GGTT (insert_page) */
1242                 } else {
1243                         page_base += offset & PAGE_MASK;
1244                 }
1245                 /* If we get a fault while copying data, then (presumably) our
1246                  * source page isn't available.  Return the error and we'll
1247                  * retry in the slow path.
1248                  * If the object is non-shmem backed, we retry again with the
1249                  * path that handles page fault.
1250                  */
1251                 if (ggtt_write(&ggtt->iomap, page_base, page_offset,
1252                                user_data, page_length)) {
1253                         ret = -EFAULT;
1254                         break;
1255                 }
1256
1257                 remain -= page_length;
1258                 user_data += page_length;
1259                 offset += page_length;
1260         }
1261         intel_fb_obj_flush(obj, ORIGIN_CPU);
1262
1263         mutex_lock(&i915->drm.struct_mutex);
1264 out_unpin:
1265         if (node.allocated) {
1266                 wmb();
1267                 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
1268                 remove_mappable_node(&node);
1269         } else {
1270                 i915_vma_unpin(vma);
1271         }
1272 out_rpm:
1273         intel_runtime_pm_put(i915, wakeref);
1274 out_unlock:
1275         mutex_unlock(&i915->drm.struct_mutex);
1276         return ret;
1277 }
1278
1279 /* Per-page copy function for the shmem pwrite fastpath.
1280  * Flushes invalid cachelines before writing to the target if
1281  * needs_clflush_before is set and flushes out any written cachelines after
1282  * writing if needs_clflush is set.
1283  */
1284 static int
1285 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
1286              bool needs_clflush_before,
1287              bool needs_clflush_after)
1288 {
1289         char *vaddr;
1290         int ret;
1291
1292         vaddr = kmap(page);
1293
1294         if (needs_clflush_before)
1295                 drm_clflush_virt_range(vaddr + offset, len);
1296
1297         ret = __copy_from_user(vaddr + offset, user_data, len);
1298         if (!ret && needs_clflush_after)
1299                 drm_clflush_virt_range(vaddr + offset, len);
1300
1301         kunmap(page);
1302
1303         return ret ? -EFAULT : 0;
1304 }
1305
1306 static int
1307 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
1308                       const struct drm_i915_gem_pwrite *args)
1309 {
1310         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1311         void __user *user_data;
1312         u64 remain;
1313         unsigned int partial_cacheline_write;
1314         unsigned int needs_clflush;
1315         unsigned int offset, idx;
1316         int ret;
1317
1318         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1319         if (ret)
1320                 return ret;
1321
1322         ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
1323         mutex_unlock(&i915->drm.struct_mutex);
1324         if (ret)
1325                 return ret;
1326
1327         /* If we don't overwrite a cacheline completely we need to be
1328          * careful to have up-to-date data by first clflushing. Don't
1329          * overcomplicate things and flush the entire patch.
1330          */
1331         partial_cacheline_write = 0;
1332         if (needs_clflush & CLFLUSH_BEFORE)
1333                 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
1334
1335         user_data = u64_to_user_ptr(args->data_ptr);
1336         remain = args->size;
1337         offset = offset_in_page(args->offset);
1338         for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
1339                 struct page *page = i915_gem_object_get_page(obj, idx);
1340                 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
1341
1342                 ret = shmem_pwrite(page, offset, length, user_data,
1343                                    (offset | length) & partial_cacheline_write,
1344                                    needs_clflush & CLFLUSH_AFTER);
1345                 if (ret)
1346                         break;
1347
1348                 remain -= length;
1349                 user_data += length;
1350                 offset = 0;
1351         }
1352
1353         intel_fb_obj_flush(obj, ORIGIN_CPU);
1354         i915_gem_obj_finish_shmem_access(obj);
1355         return ret;
1356 }
1357
1358 /**
1359  * Writes data to the object referenced by handle.
1360  * @dev: drm device
1361  * @data: ioctl data blob
1362  * @file: drm file
1363  *
1364  * On error, the contents of the buffer that were to be modified are undefined.
1365  */
1366 int
1367 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1368                       struct drm_file *file)
1369 {
1370         struct drm_i915_gem_pwrite *args = data;
1371         struct drm_i915_gem_object *obj;
1372         int ret;
1373
1374         if (args->size == 0)
1375                 return 0;
1376
1377         if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size))
1378                 return -EFAULT;
1379
1380         obj = i915_gem_object_lookup(file, args->handle);
1381         if (!obj)
1382                 return -ENOENT;
1383
1384         /* Bounds check destination. */
1385         if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1386                 ret = -EINVAL;
1387                 goto err;
1388         }
1389
1390         /* Writes not allowed into this read-only object */
1391         if (i915_gem_object_is_readonly(obj)) {
1392                 ret = -EINVAL;
1393                 goto err;
1394         }
1395
1396         trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1397
1398         ret = -ENODEV;
1399         if (obj->ops->pwrite)
1400                 ret = obj->ops->pwrite(obj, args);
1401         if (ret != -ENODEV)
1402                 goto err;
1403
1404         ret = i915_gem_object_wait(obj,
1405                                    I915_WAIT_INTERRUPTIBLE |
1406                                    I915_WAIT_ALL,
1407                                    MAX_SCHEDULE_TIMEOUT);
1408         if (ret)
1409                 goto err;
1410
1411         ret = i915_gem_object_pin_pages(obj);
1412         if (ret)
1413                 goto err;
1414
1415         ret = -EFAULT;
1416         /* We can only do the GTT pwrite on untiled buffers, as otherwise
1417          * it would end up going through the fenced access, and we'll get
1418          * different detiling behavior between reading and writing.
1419          * pread/pwrite currently are reading and writing from the CPU
1420          * perspective, requiring manual detiling by the client.
1421          */
1422         if (!i915_gem_object_has_struct_page(obj) ||
1423             cpu_write_needs_clflush(obj))
1424                 /* Note that the gtt paths might fail with non-page-backed user
1425                  * pointers (e.g. gtt mappings when moving data between
1426                  * textures). Fallback to the shmem path in that case.
1427                  */
1428                 ret = i915_gem_gtt_pwrite_fast(obj, args);
1429
1430         if (ret == -EFAULT || ret == -ENOSPC) {
1431                 if (obj->phys_handle)
1432                         ret = i915_gem_phys_pwrite(obj, args, file);
1433                 else
1434                         ret = i915_gem_shmem_pwrite(obj, args);
1435         }
1436
1437         i915_gem_object_unpin_pages(obj);
1438 err:
1439         i915_gem_object_put(obj);
1440         return ret;
1441 }
1442
1443 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
1444 {
1445         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1446         struct list_head *list;
1447         struct i915_vma *vma;
1448
1449         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
1450
1451         mutex_lock(&i915->ggtt.vm.mutex);
1452         for_each_ggtt_vma(vma, obj) {
1453                 if (!drm_mm_node_allocated(&vma->node))
1454                         continue;
1455
1456                 list_move_tail(&vma->vm_link, &vma->vm->bound_list);
1457         }
1458         mutex_unlock(&i915->ggtt.vm.mutex);
1459
1460         spin_lock(&i915->mm.obj_lock);
1461         list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
1462         list_move_tail(&obj->mm.link, list);
1463         spin_unlock(&i915->mm.obj_lock);
1464 }
1465
1466 /**
1467  * Called when user space prepares to use an object with the CPU, either
1468  * through the mmap ioctl's mapping or a GTT mapping.
1469  * @dev: drm device
1470  * @data: ioctl data blob
1471  * @file: drm file
1472  */
1473 int
1474 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1475                           struct drm_file *file)
1476 {
1477         struct drm_i915_gem_set_domain *args = data;
1478         struct drm_i915_gem_object *obj;
1479         u32 read_domains = args->read_domains;
1480         u32 write_domain = args->write_domain;
1481         int err;
1482
1483         /* Only handle setting domains to types used by the CPU. */
1484         if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
1485                 return -EINVAL;
1486
1487         /*
1488          * Having something in the write domain implies it's in the read
1489          * domain, and only that read domain.  Enforce that in the request.
1490          */
1491         if (write_domain && read_domains != write_domain)
1492                 return -EINVAL;
1493
1494         if (!read_domains)
1495                 return 0;
1496
1497         obj = i915_gem_object_lookup(file, args->handle);
1498         if (!obj)
1499                 return -ENOENT;
1500
1501         /*
1502          * Already in the desired write domain? Nothing for us to do!
1503          *
1504          * We apply a little bit of cunning here to catch a broader set of
1505          * no-ops. If obj->write_domain is set, we must be in the same
1506          * obj->read_domains, and only that domain. Therefore, if that
1507          * obj->write_domain matches the request read_domains, we are
1508          * already in the same read/write domain and can skip the operation,
1509          * without having to further check the requested write_domain.
1510          */
1511         if (READ_ONCE(obj->write_domain) == read_domains) {
1512                 err = 0;
1513                 goto out;
1514         }
1515
1516         /*
1517          * Try to flush the object off the GPU without holding the lock.
1518          * We will repeat the flush holding the lock in the normal manner
1519          * to catch cases where we are gazumped.
1520          */
1521         err = i915_gem_object_wait(obj,
1522                                    I915_WAIT_INTERRUPTIBLE |
1523                                    I915_WAIT_PRIORITY |
1524                                    (write_domain ? I915_WAIT_ALL : 0),
1525                                    MAX_SCHEDULE_TIMEOUT);
1526         if (err)
1527                 goto out;
1528
1529         /*
1530          * Proxy objects do not control access to the backing storage, ergo
1531          * they cannot be used as a means to manipulate the cache domain
1532          * tracking for that backing storage. The proxy object is always
1533          * considered to be outside of any cache domain.
1534          */
1535         if (i915_gem_object_is_proxy(obj)) {
1536                 err = -ENXIO;
1537                 goto out;
1538         }
1539
1540         /*
1541          * Flush and acquire obj->pages so that we are coherent through
1542          * direct access in memory with previous cached writes through
1543          * shmemfs and that our cache domain tracking remains valid.
1544          * For example, if the obj->filp was moved to swap without us
1545          * being notified and releasing the pages, we would mistakenly
1546          * continue to assume that the obj remained out of the CPU cached
1547          * domain.
1548          */
1549         err = i915_gem_object_pin_pages(obj);
1550         if (err)
1551                 goto out;
1552
1553         err = i915_mutex_lock_interruptible(dev);
1554         if (err)
1555                 goto out_unpin;
1556
1557         if (read_domains & I915_GEM_DOMAIN_WC)
1558                 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
1559         else if (read_domains & I915_GEM_DOMAIN_GTT)
1560                 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
1561         else
1562                 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
1563
1564         /* And bump the LRU for this access */
1565         i915_gem_object_bump_inactive_ggtt(obj);
1566
1567         mutex_unlock(&dev->struct_mutex);
1568
1569         if (write_domain != 0)
1570                 intel_fb_obj_invalidate(obj,
1571                                         fb_write_origin(obj, write_domain));
1572
1573 out_unpin:
1574         i915_gem_object_unpin_pages(obj);
1575 out:
1576         i915_gem_object_put(obj);
1577         return err;
1578 }
1579
1580 /**
1581  * Called when user space has done writes to this buffer
1582  * @dev: drm device
1583  * @data: ioctl data blob
1584  * @file: drm file
1585  */
1586 int
1587 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1588                          struct drm_file *file)
1589 {
1590         struct drm_i915_gem_sw_finish *args = data;
1591         struct drm_i915_gem_object *obj;
1592
1593         obj = i915_gem_object_lookup(file, args->handle);
1594         if (!obj)
1595                 return -ENOENT;
1596
1597         /*
1598          * Proxy objects are barred from CPU access, so there is no
1599          * need to ban sw_finish as it is a nop.
1600          */
1601
1602         /* Pinned buffers may be scanout, so flush the cache */
1603         i915_gem_object_flush_if_display(obj);
1604         i915_gem_object_put(obj);
1605
1606         return 0;
1607 }
1608
1609 static inline bool
1610 __vma_matches(struct vm_area_struct *vma, struct file *filp,
1611               unsigned long addr, unsigned long size)
1612 {
1613         if (vma->vm_file != filp)
1614                 return false;
1615
1616         return vma->vm_start == addr &&
1617                (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
1618 }
1619
1620 /**
1621  * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1622  *                       it is mapped to.
1623  * @dev: drm device
1624  * @data: ioctl data blob
1625  * @file: drm file
1626  *
1627  * While the mapping holds a reference on the contents of the object, it doesn't
1628  * imply a ref on the object itself.
1629  *
1630  * IMPORTANT:
1631  *
1632  * DRM driver writers who look a this function as an example for how to do GEM
1633  * mmap support, please don't implement mmap support like here. The modern way
1634  * to implement DRM mmap support is with an mmap offset ioctl (like
1635  * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1636  * That way debug tooling like valgrind will understand what's going on, hiding
1637  * the mmap call in a driver private ioctl will break that. The i915 driver only
1638  * does cpu mmaps this way because we didn't know better.
1639  */
1640 int
1641 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1642                     struct drm_file *file)
1643 {
1644         struct drm_i915_gem_mmap *args = data;
1645         struct drm_i915_gem_object *obj;
1646         unsigned long addr;
1647
1648         if (args->flags & ~(I915_MMAP_WC))
1649                 return -EINVAL;
1650
1651         if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1652                 return -ENODEV;
1653
1654         obj = i915_gem_object_lookup(file, args->handle);
1655         if (!obj)
1656                 return -ENOENT;
1657
1658         /* prime objects have no backing filp to GEM mmap
1659          * pages from.
1660          */
1661         if (!obj->base.filp) {
1662                 addr = -ENXIO;
1663                 goto err;
1664         }
1665
1666         if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
1667                 addr = -EINVAL;
1668                 goto err;
1669         }
1670
1671         addr = vm_mmap(obj->base.filp, 0, args->size,
1672                        PROT_READ | PROT_WRITE, MAP_SHARED,
1673                        args->offset);
1674         if (IS_ERR_VALUE(addr))
1675                 goto err;
1676
1677         if (args->flags & I915_MMAP_WC) {
1678                 struct mm_struct *mm = current->mm;
1679                 struct vm_area_struct *vma;
1680
1681                 if (down_write_killable(&mm->mmap_sem)) {
1682                         addr = -EINTR;
1683                         goto err;
1684                 }
1685                 vma = find_vma(mm, addr);
1686                 if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
1687                         vma->vm_page_prot =
1688                                 pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1689                 else
1690                         addr = -ENOMEM;
1691                 up_write(&mm->mmap_sem);
1692                 if (IS_ERR_VALUE(addr))
1693                         goto err;
1694
1695                 /* This may race, but that's ok, it only gets set */
1696                 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
1697         }
1698         i915_gem_object_put(obj);
1699
1700         args->addr_ptr = (u64)addr;
1701         return 0;
1702
1703 err:
1704         i915_gem_object_put(obj);
1705         return addr;
1706 }
1707
1708 static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
1709 {
1710         return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
1711 }
1712
1713 /**
1714  * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
1715  *
1716  * A history of the GTT mmap interface:
1717  *
1718  * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
1719  *     aligned and suitable for fencing, and still fit into the available
1720  *     mappable space left by the pinned display objects. A classic problem
1721  *     we called the page-fault-of-doom where we would ping-pong between
1722  *     two objects that could not fit inside the GTT and so the memcpy
1723  *     would page one object in at the expense of the other between every
1724  *     single byte.
1725  *
1726  * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
1727  *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
1728  *     object is too large for the available space (or simply too large
1729  *     for the mappable aperture!), a view is created instead and faulted
1730  *     into userspace. (This view is aligned and sized appropriately for
1731  *     fenced access.)
1732  *
1733  * 2 - Recognise WC as a separate cache domain so that we can flush the
1734  *     delayed writes via GTT before performing direct access via WC.
1735  *
1736  * 3 - Remove implicit set-domain(GTT) and synchronisation on initial
1737  *     pagefault; swapin remains transparent.
1738  *
1739  * Restrictions:
1740  *
1741  *  * snoopable objects cannot be accessed via the GTT. It can cause machine
1742  *    hangs on some architectures, corruption on others. An attempt to service
1743  *    a GTT page fault from a snoopable object will generate a SIGBUS.
1744  *
1745  *  * the object must be able to fit into RAM (physical memory, though no
1746  *    limited to the mappable aperture).
1747  *
1748  *
1749  * Caveats:
1750  *
1751  *  * a new GTT page fault will synchronize rendering from the GPU and flush
1752  *    all data to system memory. Subsequent access will not be synchronized.
1753  *
1754  *  * all mappings are revoked on runtime device suspend.
1755  *
1756  *  * there are only 8, 16 or 32 fence registers to share between all users
1757  *    (older machines require fence register for display and blitter access
1758  *    as well). Contention of the fence registers will cause the previous users
1759  *    to be unmapped and any new access will generate new page faults.
1760  *
1761  *  * running out of memory while servicing a fault may generate a SIGBUS,
1762  *    rather than the expected SIGSEGV.
1763  */
1764 int i915_gem_mmap_gtt_version(void)
1765 {
1766         return 3;
1767 }
1768
1769 static inline struct i915_ggtt_view
1770 compute_partial_view(const struct drm_i915_gem_object *obj,
1771                      pgoff_t page_offset,
1772                      unsigned int chunk)
1773 {
1774         struct i915_ggtt_view view;
1775
1776         if (i915_gem_object_is_tiled(obj))
1777                 chunk = roundup(chunk, tile_row_pages(obj));
1778
1779         view.type = I915_GGTT_VIEW_PARTIAL;
1780         view.partial.offset = rounddown(page_offset, chunk);
1781         view.partial.size =
1782                 min_t(unsigned int, chunk,
1783                       (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
1784
1785         /* If the partial covers the entire object, just create a normal VMA. */
1786         if (chunk >= obj->base.size >> PAGE_SHIFT)
1787                 view.type = I915_GGTT_VIEW_NORMAL;
1788
1789         return view;
1790 }
1791
1792 /**
1793  * i915_gem_fault - fault a page into the GTT
1794  * @vmf: fault info
1795  *
1796  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1797  * from userspace.  The fault handler takes care of binding the object to
1798  * the GTT (if needed), allocating and programming a fence register (again,
1799  * only if needed based on whether the old reg is still valid or the object
1800  * is tiled) and inserting a new PTE into the faulting process.
1801  *
1802  * Note that the faulting process may involve evicting existing objects
1803  * from the GTT and/or fence registers to make room.  So performance may
1804  * suffer if the GTT working set is large or there are few fence registers
1805  * left.
1806  *
1807  * The current feature set supported by i915_gem_fault() and thus GTT mmaps
1808  * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
1809  */
1810 vm_fault_t i915_gem_fault(struct vm_fault *vmf)
1811 {
1812 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
1813         struct vm_area_struct *area = vmf->vma;
1814         struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
1815         struct drm_device *dev = obj->base.dev;
1816         struct drm_i915_private *dev_priv = to_i915(dev);
1817         struct i915_ggtt *ggtt = &dev_priv->ggtt;
1818         bool write = area->vm_flags & VM_WRITE;
1819         intel_wakeref_t wakeref;
1820         struct i915_vma *vma;
1821         pgoff_t page_offset;
1822         int srcu;
1823         int ret;
1824
1825         /* Sanity check that we allow writing into this object */
1826         if (i915_gem_object_is_readonly(obj) && write)
1827                 return VM_FAULT_SIGBUS;
1828
1829         /* We don't use vmf->pgoff since that has the fake offset */
1830         page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
1831
1832         trace_i915_gem_object_fault(obj, page_offset, true, write);
1833
1834         ret = i915_gem_object_pin_pages(obj);
1835         if (ret)
1836                 goto err;
1837
1838         wakeref = intel_runtime_pm_get(dev_priv);
1839
1840         srcu = i915_reset_trylock(dev_priv);
1841         if (srcu < 0) {
1842                 ret = srcu;
1843                 goto err_rpm;
1844         }
1845
1846         ret = i915_mutex_lock_interruptible(dev);
1847         if (ret)
1848                 goto err_reset;
1849
1850         /* Access to snoopable pages through the GTT is incoherent. */
1851         if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
1852                 ret = -EFAULT;
1853                 goto err_unlock;
1854         }
1855
1856         /* Now pin it into the GTT as needed */
1857         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1858                                        PIN_MAPPABLE |
1859                                        PIN_NONBLOCK |
1860                                        PIN_NONFAULT);
1861         if (IS_ERR(vma)) {
1862                 /* Use a partial view if it is bigger than available space */
1863                 struct i915_ggtt_view view =
1864                         compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
1865                 unsigned int flags;
1866
1867                 flags = PIN_MAPPABLE;
1868                 if (view.type == I915_GGTT_VIEW_NORMAL)
1869                         flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
1870
1871                 /*
1872                  * Userspace is now writing through an untracked VMA, abandon
1873                  * all hope that the hardware is able to track future writes.
1874                  */
1875                 obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
1876
1877                 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
1878                 if (IS_ERR(vma) && !view.type) {
1879                         flags = PIN_MAPPABLE;
1880                         view.type = I915_GGTT_VIEW_PARTIAL;
1881                         vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
1882                 }
1883         }
1884         if (IS_ERR(vma)) {
1885                 ret = PTR_ERR(vma);
1886                 goto err_unlock;
1887         }
1888
1889         ret = i915_vma_pin_fence(vma);
1890         if (ret)
1891                 goto err_unpin;
1892
1893         /* Finally, remap it using the new GTT offset */
1894         ret = remap_io_mapping(area,
1895                                area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
1896                                (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
1897                                min_t(u64, vma->size, area->vm_end - area->vm_start),
1898                                &ggtt->iomap);
1899         if (ret)
1900                 goto err_fence;
1901
1902         /* Mark as being mmapped into userspace for later revocation */
1903         assert_rpm_wakelock_held(dev_priv);
1904         if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
1905                 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
1906         GEM_BUG_ON(!obj->userfault_count);
1907
1908         i915_vma_set_ggtt_write(vma);
1909
1910 err_fence:
1911         i915_vma_unpin_fence(vma);
1912 err_unpin:
1913         __i915_vma_unpin(vma);
1914 err_unlock:
1915         mutex_unlock(&dev->struct_mutex);
1916 err_reset:
1917         i915_reset_unlock(dev_priv, srcu);
1918 err_rpm:
1919         intel_runtime_pm_put(dev_priv, wakeref);
1920         i915_gem_object_unpin_pages(obj);
1921 err:
1922         switch (ret) {
1923         case -EIO:
1924                 /*
1925                  * We eat errors when the gpu is terminally wedged to avoid
1926                  * userspace unduly crashing (gl has no provisions for mmaps to
1927                  * fail). But any other -EIO isn't ours (e.g. swap in failure)
1928                  * and so needs to be reported.
1929                  */
1930                 if (!i915_terminally_wedged(dev_priv))
1931                         return VM_FAULT_SIGBUS;
1932                 /* else: fall through */
1933         case -EAGAIN:
1934                 /*
1935                  * EAGAIN means the gpu is hung and we'll wait for the error
1936                  * handler to reset everything when re-faulting in
1937                  * i915_mutex_lock_interruptible.
1938                  */
1939         case 0:
1940         case -ERESTARTSYS:
1941         case -EINTR:
1942         case -EBUSY:
1943                 /*
1944                  * EBUSY is ok: this just means that another thread
1945                  * already did the job.
1946                  */
1947                 return VM_FAULT_NOPAGE;
1948         case -ENOMEM:
1949                 return VM_FAULT_OOM;
1950         case -ENOSPC:
1951         case -EFAULT:
1952                 return VM_FAULT_SIGBUS;
1953         default:
1954                 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
1955                 return VM_FAULT_SIGBUS;
1956         }
1957 }
1958
1959 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
1960 {
1961         struct i915_vma *vma;
1962
1963         GEM_BUG_ON(!obj->userfault_count);
1964
1965         obj->userfault_count = 0;
1966         list_del(&obj->userfault_link);
1967         drm_vma_node_unmap(&obj->base.vma_node,
1968                            obj->base.dev->anon_inode->i_mapping);
1969
1970         for_each_ggtt_vma(vma, obj)
1971                 i915_vma_unset_userfault(vma);
1972 }
1973
1974 /**
1975  * i915_gem_release_mmap - remove physical page mappings
1976  * @obj: obj in question
1977  *
1978  * Preserve the reservation of the mmapping with the DRM core code, but
1979  * relinquish ownership of the pages back to the system.
1980  *
1981  * It is vital that we remove the page mapping if we have mapped a tiled
1982  * object through the GTT and then lose the fence register due to
1983  * resource pressure. Similarly if the object has been moved out of the
1984  * aperture, than pages mapped into userspace must be revoked. Removing the
1985  * mapping will then trigger a page fault on the next user access, allowing
1986  * fixup by i915_gem_fault().
1987  */
1988 void
1989 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1990 {
1991         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1992         intel_wakeref_t wakeref;
1993
1994         /* Serialisation between user GTT access and our code depends upon
1995          * revoking the CPU's PTE whilst the mutex is held. The next user
1996          * pagefault then has to wait until we release the mutex.
1997          *
1998          * Note that RPM complicates somewhat by adding an additional
1999          * requirement that operations to the GGTT be made holding the RPM
2000          * wakeref.
2001          */
2002         lockdep_assert_held(&i915->drm.struct_mutex);
2003         wakeref = intel_runtime_pm_get(i915);
2004
2005         if (!obj->userfault_count)
2006                 goto out;
2007
2008         __i915_gem_object_release_mmap(obj);
2009
2010         /* Ensure that the CPU's PTE are revoked and there are not outstanding
2011          * memory transactions from userspace before we return. The TLB
2012          * flushing implied above by changing the PTE above *should* be
2013          * sufficient, an extra barrier here just provides us with a bit
2014          * of paranoid documentation about our requirement to serialise
2015          * memory writes before touching registers / GSM.
2016          */
2017         wmb();
2018
2019 out:
2020         intel_runtime_pm_put(i915, wakeref);
2021 }
2022
2023 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
2024 {
2025         struct drm_i915_gem_object *obj, *on;
2026         int i;
2027
2028         /*
2029          * Only called during RPM suspend. All users of the userfault_list
2030          * must be holding an RPM wakeref to ensure that this can not
2031          * run concurrently with themselves (and use the struct_mutex for
2032          * protection between themselves).
2033          */
2034
2035         list_for_each_entry_safe(obj, on,
2036                                  &dev_priv->mm.userfault_list, userfault_link)
2037                 __i915_gem_object_release_mmap(obj);
2038
2039         /* The fence will be lost when the device powers down. If any were
2040          * in use by hardware (i.e. they are pinned), we should not be powering
2041          * down! All other fences will be reacquired by the user upon waking.
2042          */
2043         for (i = 0; i < dev_priv->num_fence_regs; i++) {
2044                 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
2045
2046                 /* Ideally we want to assert that the fence register is not
2047                  * live at this point (i.e. that no piece of code will be
2048                  * trying to write through fence + GTT, as that both violates
2049                  * our tracking of activity and associated locking/barriers,
2050                  * but also is illegal given that the hw is powered down).
2051                  *
2052                  * Previously we used reg->pin_count as a "liveness" indicator.
2053                  * That is not sufficient, and we need a more fine-grained
2054                  * tool if we want to have a sanity check here.
2055                  */
2056
2057                 if (!reg->vma)
2058                         continue;
2059
2060                 GEM_BUG_ON(i915_vma_has_userfault(reg->vma));
2061                 reg->dirty = true;
2062         }
2063 }
2064
2065 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2066 {
2067         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2068         int err;
2069
2070         err = drm_gem_create_mmap_offset(&obj->base);
2071         if (likely(!err))
2072                 return 0;
2073
2074         /* Attempt to reap some mmap space from dead objects */
2075         do {
2076                 err = i915_gem_wait_for_idle(dev_priv,
2077                                              I915_WAIT_INTERRUPTIBLE,
2078                                              MAX_SCHEDULE_TIMEOUT);
2079                 if (err)
2080                         break;
2081
2082                 i915_gem_drain_freed_objects(dev_priv);
2083                 err = drm_gem_create_mmap_offset(&obj->base);
2084                 if (!err)
2085                         break;
2086
2087         } while (flush_delayed_work(&dev_priv->gt.retire_work));
2088
2089         return err;
2090 }
2091
2092 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2093 {
2094         drm_gem_free_mmap_offset(&obj->base);
2095 }
2096
2097 int
2098 i915_gem_mmap_gtt(struct drm_file *file,
2099                   struct drm_device *dev,
2100                   u32 handle,
2101                   u64 *offset)
2102 {
2103         struct drm_i915_gem_object *obj;
2104         int ret;
2105
2106         obj = i915_gem_object_lookup(file, handle);
2107         if (!obj)
2108                 return -ENOENT;
2109
2110         ret = i915_gem_object_create_mmap_offset(obj);
2111         if (ret == 0)
2112                 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2113
2114         i915_gem_object_put(obj);
2115         return ret;
2116 }
2117
2118 /**
2119  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2120  * @dev: DRM device
2121  * @data: GTT mapping ioctl data
2122  * @file: GEM object info
2123  *
2124  * Simply returns the fake offset to userspace so it can mmap it.
2125  * The mmap call will end up in drm_gem_mmap(), which will set things
2126  * up so we can get faults in the handler above.
2127  *
2128  * The fault handler will take care of binding the object into the GTT
2129  * (since it may have been evicted to make room for something), allocating
2130  * a fence register, and mapping the appropriate aperture address into
2131  * userspace.
2132  */
2133 int
2134 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2135                         struct drm_file *file)
2136 {
2137         struct drm_i915_gem_mmap_gtt *args = data;
2138
2139         return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2140 }
2141
2142 /* Immediately discard the backing storage */
2143 static void
2144 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2145 {
2146         i915_gem_object_free_mmap_offset(obj);
2147
2148         if (obj->base.filp == NULL)
2149                 return;
2150
2151         /* Our goal here is to return as much of the memory as
2152          * is possible back to the system as we are called from OOM.
2153          * To do this we must instruct the shmfs to drop all of its
2154          * backing pages, *now*.
2155          */
2156         shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
2157         obj->mm.madv = __I915_MADV_PURGED;
2158         obj->mm.pages = ERR_PTR(-EFAULT);
2159 }
2160
2161 /* Try to discard unwanted pages */
2162 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
2163 {
2164         struct address_space *mapping;
2165
2166         lockdep_assert_held(&obj->mm.lock);
2167         GEM_BUG_ON(i915_gem_object_has_pages(obj));
2168
2169         switch (obj->mm.madv) {
2170         case I915_MADV_DONTNEED:
2171                 i915_gem_object_truncate(obj);
2172         case __I915_MADV_PURGED:
2173                 return;
2174         }
2175
2176         if (obj->base.filp == NULL)
2177                 return;
2178
2179         mapping = obj->base.filp->f_mapping,
2180         invalidate_mapping_pages(mapping, 0, (loff_t)-1);
2181 }
2182
2183 /*
2184  * Move pages to appropriate lru and release the pagevec, decrementing the
2185  * ref count of those pages.
2186  */
2187 static void check_release_pagevec(struct pagevec *pvec)
2188 {
2189         check_move_unevictable_pages(pvec);
2190         __pagevec_release(pvec);
2191         cond_resched();
2192 }
2193
2194 static void
2195 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
2196                               struct sg_table *pages)
2197 {
2198         struct sgt_iter sgt_iter;
2199         struct pagevec pvec;
2200         struct page *page;
2201
2202         __i915_gem_object_release_shmem(obj, pages, true);
2203
2204         i915_gem_gtt_finish_pages(obj, pages);
2205
2206         if (i915_gem_object_needs_bit17_swizzle(obj))
2207                 i915_gem_object_save_bit_17_swizzle(obj, pages);
2208
2209         mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
2210
2211         pagevec_init(&pvec);
2212         for_each_sgt_page(page, sgt_iter, pages) {
2213                 if (obj->mm.dirty)
2214                         set_page_dirty(page);
2215
2216                 if (obj->mm.madv == I915_MADV_WILLNEED)
2217                         mark_page_accessed(page);
2218
2219                 if (!pagevec_add(&pvec, page))
2220                         check_release_pagevec(&pvec);
2221         }
2222         if (pagevec_count(&pvec))
2223                 check_release_pagevec(&pvec);
2224         obj->mm.dirty = false;
2225
2226         sg_free_table(pages);
2227         kfree(pages);
2228 }
2229
2230 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
2231 {
2232         struct radix_tree_iter iter;
2233         void __rcu **slot;
2234
2235         rcu_read_lock();
2236         radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
2237                 radix_tree_delete(&obj->mm.get_page.radix, iter.index);
2238         rcu_read_unlock();
2239 }
2240
2241 static struct sg_table *
2242 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
2243 {
2244         struct drm_i915_private *i915 = to_i915(obj->base.dev);
2245         struct sg_table *pages;
2246
2247         pages = fetch_and_zero(&obj->mm.pages);
2248         if (IS_ERR_OR_NULL(pages))
2249                 return pages;
2250
2251         spin_lock(&i915->mm.obj_lock);
2252         list_del(&obj->mm.link);
2253         spin_unlock(&i915->mm.obj_lock);
2254
2255         if (obj->mm.mapping) {
2256                 void *ptr;
2257
2258                 ptr = page_mask_bits(obj->mm.mapping);
2259                 if (is_vmalloc_addr(ptr))
2260                         vunmap(ptr);
2261                 else
2262                         kunmap(kmap_to_page(ptr));
2263
2264                 obj->mm.mapping = NULL;
2265         }
2266
2267         __i915_gem_object_reset_page_iter(obj);
2268         obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
2269
2270         return pages;
2271 }
2272
2273 int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
2274                                 enum i915_mm_subclass subclass)
2275 {
2276         struct sg_table *pages;
2277         int ret;
2278
2279         if (i915_gem_object_has_pinned_pages(obj))
2280                 return -EBUSY;
2281
2282         GEM_BUG_ON(obj->bind_count);
2283
2284         /* May be called by shrinker from within get_pages() (on another bo) */
2285         mutex_lock_nested(&obj->mm.lock, subclass);
2286         if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
2287                 ret = -EBUSY;
2288                 goto unlock;
2289         }
2290
2291         /*
2292          * ->put_pages might need to allocate memory for the bit17 swizzle
2293          * array, hence protect them from being reaped by removing them from gtt
2294          * lists early.
2295          */
2296         pages = __i915_gem_object_unset_pages(obj);
2297
2298         /*
2299          * XXX Temporary hijinx to avoid updating all backends to handle
2300          * NULL pages. In the future, when we have more asynchronous
2301          * get_pages backends we should be better able to handle the
2302          * cancellation of the async task in a more uniform manner.
2303          */
2304         if (!pages && !i915_gem_object_needs_async_cancel(obj))
2305                 pages = ERR_PTR(-EINVAL);
2306
2307         if (!IS_ERR(pages))
2308                 obj->ops->put_pages(obj, pages);
2309
2310         ret = 0;
2311 unlock:
2312         mutex_unlock(&obj->mm.lock);
2313
2314         return ret;
2315 }
2316
2317 bool i915_sg_trim(struct sg_table *orig_st)
2318 {
2319         struct sg_table new_st;
2320         struct scatterlist *sg, *new_sg;
2321         unsigned int i;
2322
2323         if (orig_st->nents == orig_st->orig_nents)
2324                 return false;
2325
2326         if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
2327                 return false;
2328
2329         new_sg = new_st.sgl;
2330         for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
2331                 sg_set_page(new_sg, sg_page(sg), sg->length, 0);
2332                 sg_dma_address(new_sg) = sg_dma_address(sg);
2333                 sg_dma_len(new_sg) = sg_dma_len(sg);
2334
2335                 new_sg = sg_next(new_sg);
2336         }
2337         GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
2338
2339         sg_free_table(orig_st);
2340
2341         *orig_st = new_st;
2342         return true;
2343 }
2344
2345 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2346 {
2347         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2348         const unsigned long page_count = obj->base.size / PAGE_SIZE;
2349         unsigned long i;
2350         struct address_space *mapping;
2351         struct sg_table *st;
2352         struct scatterlist *sg;
2353         struct sgt_iter sgt_iter;
2354         struct page *page;
2355         unsigned long last_pfn = 0;     /* suppress gcc warning */
2356         unsigned int max_segment = i915_sg_segment_size();
2357         unsigned int sg_page_sizes;
2358         struct pagevec pvec;
2359         gfp_t noreclaim;
2360         int ret;
2361
2362         /*
2363          * Assert that the object is not currently in any GPU domain. As it
2364          * wasn't in the GTT, there shouldn't be any way it could have been in
2365          * a GPU cache
2366          */
2367         GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2368         GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
2369
2370         /*
2371          * If there's no chance of allocating enough pages for the whole
2372          * object, bail early.
2373          */
2374         if (page_count > totalram_pages())
2375                 return -ENOMEM;
2376
2377         st = kmalloc(sizeof(*st), GFP_KERNEL);
2378         if (st == NULL)
2379                 return -ENOMEM;
2380
2381 rebuild_st:
2382         if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2383                 kfree(st);
2384                 return -ENOMEM;
2385         }
2386
2387         /*
2388          * Get the list of pages out of our struct file.  They'll be pinned
2389          * at this point until we release them.
2390          *
2391          * Fail silently without starting the shrinker
2392          */
2393         mapping = obj->base.filp->f_mapping;
2394         mapping_set_unevictable(mapping);
2395         noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
2396         noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
2397
2398         sg = st->sgl;
2399         st->nents = 0;
2400         sg_page_sizes = 0;
2401         for (i = 0; i < page_count; i++) {
2402                 const unsigned int shrink[] = {
2403                         I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
2404                         0,
2405                 }, *s = shrink;
2406                 gfp_t gfp = noreclaim;
2407
2408                 do {
2409                         cond_resched();
2410                         page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2411                         if (!IS_ERR(page))
2412                                 break;
2413
2414                         if (!*s) {
2415                                 ret = PTR_ERR(page);
2416                                 goto err_sg;
2417                         }
2418
2419                         i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++);
2420
2421                         /*
2422                          * We've tried hard to allocate the memory by reaping
2423                          * our own buffer, now let the real VM do its job and
2424                          * go down in flames if truly OOM.
2425                          *
2426                          * However, since graphics tend to be disposable,
2427                          * defer the oom here by reporting the ENOMEM back
2428                          * to userspace.
2429                          */
2430                         if (!*s) {
2431                                 /* reclaim and warn, but no oom */
2432                                 gfp = mapping_gfp_mask(mapping);
2433
2434                                 /*
2435                                  * Our bo are always dirty and so we require
2436                                  * kswapd to reclaim our pages (direct reclaim
2437                                  * does not effectively begin pageout of our
2438                                  * buffers on its own). However, direct reclaim
2439                                  * only waits for kswapd when under allocation
2440                                  * congestion. So as a result __GFP_RECLAIM is
2441                                  * unreliable and fails to actually reclaim our
2442                                  * dirty pages -- unless you try over and over
2443                                  * again with !__GFP_NORETRY. However, we still
2444                                  * want to fail this allocation rather than
2445                                  * trigger the out-of-memory killer and for
2446                                  * this we want __GFP_RETRY_MAYFAIL.
2447                                  */
2448                                 gfp |= __GFP_RETRY_MAYFAIL;
2449                         }
2450                 } while (1);
2451
2452                 if (!i ||
2453                     sg->length >= max_segment ||
2454                     page_to_pfn(page) != last_pfn + 1) {
2455                         if (i) {
2456                                 sg_page_sizes |= sg->length;
2457                                 sg = sg_next(sg);
2458                         }
2459                         st->nents++;
2460                         sg_set_page(sg, page, PAGE_SIZE, 0);
2461                 } else {
2462                         sg->length += PAGE_SIZE;
2463                 }
2464                 last_pfn = page_to_pfn(page);
2465
2466                 /* Check that the i965g/gm workaround works. */
2467                 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2468         }
2469         if (sg) { /* loop terminated early; short sg table */
2470                 sg_page_sizes |= sg->length;
2471                 sg_mark_end(sg);
2472         }
2473
2474         /* Trim unused sg entries to avoid wasting memory. */
2475         i915_sg_trim(st);
2476
2477         ret = i915_gem_gtt_prepare_pages(obj, st);
2478         if (ret) {
2479                 /*
2480                  * DMA remapping failed? One possible cause is that
2481                  * it could not reserve enough large entries, asking
2482                  * for PAGE_SIZE chunks instead may be helpful.
2483                  */
2484                 if (max_segment > PAGE_SIZE) {
2485                         for_each_sgt_page(page, sgt_iter, st)
2486                                 put_page(page);
2487                         sg_free_table(st);
2488
2489                         max_segment = PAGE_SIZE;
2490                         goto rebuild_st;
2491                 } else {
2492                         dev_warn(&dev_priv->drm.pdev->dev,
2493                                  "Failed to DMA remap %lu pages\n",
2494                                  page_count);
2495                         goto err_pages;
2496                 }
2497         }
2498
2499         if (i915_gem_object_needs_bit17_swizzle(obj))
2500                 i915_gem_object_do_bit_17_swizzle(obj, st);
2501
2502         __i915_gem_object_set_pages(obj, st, sg_page_sizes);
2503
2504         return 0;
2505
2506 err_sg:
2507         sg_mark_end(sg);
2508 err_pages:
2509         mapping_clear_unevictable(mapping);
2510         pagevec_init(&pvec);
2511         for_each_sgt_page(page, sgt_iter, st) {
2512                 if (!pagevec_add(&pvec, page))
2513                         check_release_pagevec(&pvec);
2514         }
2515         if (pagevec_count(&pvec))
2516                 check_release_pagevec(&pvec);
2517         sg_free_table(st);
2518         kfree(st);
2519
2520         /*
2521          * shmemfs first checks if there is enough memory to allocate the page
2522          * and reports ENOSPC should there be insufficient, along with the usual
2523          * ENOMEM for a genuine allocation failure.
2524          *
2525          * We use ENOSPC in our driver to mean that we have run out of aperture
2526          * space and so want to translate the error from shmemfs back to our
2527          * usual understanding of ENOMEM.
2528          */
2529         if (ret == -ENOSPC)
2530                 ret = -ENOMEM;
2531
2532         return ret;
2533 }
2534
2535 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
2536                                  struct sg_table *pages,
2537                                  unsigned int sg_page_sizes)
2538 {
2539         struct drm_i915_private *i915 = to_i915(obj->base.dev);
2540         unsigned long supported = INTEL_INFO(i915)->page_sizes;
2541         int i;
2542
2543         lockdep_assert_held(&obj->mm.lock);
2544
2545         /* Make the pages coherent with the GPU (flushing any swapin). */
2546         if (obj->cache_dirty) {
2547                 obj->write_domain = 0;
2548                 if (i915_gem_object_has_struct_page(obj))
2549                         drm_clflush_sg(pages);
2550                 obj->cache_dirty = false;
2551         }
2552
2553         obj->mm.get_page.sg_pos = pages->sgl;
2554         obj->mm.get_page.sg_idx = 0;
2555
2556         obj->mm.pages = pages;
2557
2558         if (i915_gem_object_is_tiled(obj) &&
2559             i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
2560                 GEM_BUG_ON(obj->mm.quirked);
2561                 __i915_gem_object_pin_pages(obj);
2562                 obj->mm.quirked = true;
2563         }
2564
2565         GEM_BUG_ON(!sg_page_sizes);
2566         obj->mm.page_sizes.phys = sg_page_sizes;
2567
2568         /*
2569          * Calculate the supported page-sizes which fit into the given
2570          * sg_page_sizes. This will give us the page-sizes which we may be able
2571          * to use opportunistically when later inserting into the GTT. For
2572          * example if phys=2G, then in theory we should be able to use 1G, 2M,
2573          * 64K or 4K pages, although in practice this will depend on a number of
2574          * other factors.
2575          */
2576         obj->mm.page_sizes.sg = 0;
2577         for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
2578                 if (obj->mm.page_sizes.phys & ~0u << i)
2579                         obj->mm.page_sizes.sg |= BIT(i);
2580         }
2581         GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
2582
2583         spin_lock(&i915->mm.obj_lock);
2584         list_add(&obj->mm.link, &i915->mm.unbound_list);
2585         spin_unlock(&i915->mm.obj_lock);
2586 }
2587
2588 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2589 {
2590         int err;
2591
2592         if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
2593                 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2594                 return -EFAULT;
2595         }
2596
2597         err = obj->ops->get_pages(obj);
2598         GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
2599
2600         return err;
2601 }
2602
2603 /* Ensure that the associated pages are gathered from the backing storage
2604  * and pinned into our object. i915_gem_object_pin_pages() may be called
2605  * multiple times before they are released by a single call to
2606  * i915_gem_object_unpin_pages() - once the pages are no longer referenced
2607  * either as a result of memory pressure (reaping pages under the shrinker)
2608  * or as the object is itself released.
2609  */
2610 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2611 {
2612         int err;
2613
2614         err = mutex_lock_interruptible(&obj->mm.lock);
2615         if (err)
2616                 return err;
2617
2618         if (unlikely(!i915_gem_object_has_pages(obj))) {
2619                 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2620
2621                 err = ____i915_gem_object_get_pages(obj);
2622                 if (err)
2623                         goto unlock;
2624
2625                 smp_mb__before_atomic();
2626         }
2627         atomic_inc(&obj->mm.pages_pin_count);
2628
2629 unlock:
2630         mutex_unlock(&obj->mm.lock);
2631         return err;
2632 }
2633
2634 /* The 'mapping' part of i915_gem_object_pin_map() below */
2635 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
2636                                  enum i915_map_type type)
2637 {
2638         unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2639         struct sg_table *sgt = obj->mm.pages;
2640         struct sgt_iter sgt_iter;
2641         struct page *page;
2642         struct page *stack_pages[32];
2643         struct page **pages = stack_pages;
2644         unsigned long i = 0;
2645         pgprot_t pgprot;
2646         void *addr;
2647
2648         /* A single page can always be kmapped */
2649         if (n_pages == 1 && type == I915_MAP_WB)
2650                 return kmap(sg_page(sgt->sgl));
2651
2652         if (n_pages > ARRAY_SIZE(stack_pages)) {
2653                 /* Too big for stack -- allocate temporary array instead */
2654                 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
2655                 if (!pages)
2656                         return NULL;
2657         }
2658
2659         for_each_sgt_page(page, sgt_iter, sgt)
2660                 pages[i++] = page;
2661
2662         /* Check that we have the expected number of pages */
2663         GEM_BUG_ON(i != n_pages);
2664
2665         switch (type) {
2666         default:
2667                 MISSING_CASE(type);
2668                 /* fallthrough to use PAGE_KERNEL anyway */
2669         case I915_MAP_WB:
2670                 pgprot = PAGE_KERNEL;
2671                 break;
2672         case I915_MAP_WC:
2673                 pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
2674                 break;
2675         }
2676         addr = vmap(pages, n_pages, 0, pgprot);
2677
2678         if (pages != stack_pages)
2679                 kvfree(pages);
2680
2681         return addr;
2682 }
2683
2684 /* get, pin, and map the pages of the object into kernel space */
2685 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2686                               enum i915_map_type type)
2687 {
2688         enum i915_map_type has_type;
2689         bool pinned;
2690         void *ptr;
2691         int ret;
2692
2693         if (unlikely(!i915_gem_object_has_struct_page(obj)))
2694                 return ERR_PTR(-ENXIO);
2695
2696         ret = mutex_lock_interruptible(&obj->mm.lock);
2697         if (ret)
2698                 return ERR_PTR(ret);
2699
2700         pinned = !(type & I915_MAP_OVERRIDE);
2701         type &= ~I915_MAP_OVERRIDE;
2702
2703         if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
2704                 if (unlikely(!i915_gem_object_has_pages(obj))) {
2705                         GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2706
2707                         ret = ____i915_gem_object_get_pages(obj);
2708                         if (ret)
2709                                 goto err_unlock;
2710
2711                         smp_mb__before_atomic();
2712                 }
2713                 atomic_inc(&obj->mm.pages_pin_count);
2714                 pinned = false;
2715         }
2716         GEM_BUG_ON(!i915_gem_object_has_pages(obj));
2717
2718         ptr = page_unpack_bits(obj->mm.mapping, &has_type);
2719         if (ptr && has_type != type) {
2720                 if (pinned) {
2721                         ret = -EBUSY;
2722                         goto err_unpin;
2723                 }
2724
2725                 if (is_vmalloc_addr(ptr))
2726                         vunmap(ptr);
2727                 else
2728                         kunmap(kmap_to_page(ptr));
2729
2730                 ptr = obj->mm.mapping = NULL;
2731         }
2732
2733         if (!ptr) {
2734                 ptr = i915_gem_object_map(obj, type);
2735                 if (!ptr) {
2736                         ret = -ENOMEM;
2737                         goto err_unpin;
2738                 }
2739
2740                 obj->mm.mapping = page_pack_bits(ptr, type);
2741         }
2742
2743 out_unlock:
2744         mutex_unlock(&obj->mm.lock);
2745         return ptr;
2746
2747 err_unpin:
2748         atomic_dec(&obj->mm.pages_pin_count);
2749 err_unlock:
2750         ptr = ERR_PTR(ret);
2751         goto out_unlock;
2752 }
2753
2754 void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
2755                                  unsigned long offset,
2756                                  unsigned long size)
2757 {
2758         enum i915_map_type has_type;
2759         void *ptr;
2760
2761         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
2762         GEM_BUG_ON(range_overflows_t(typeof(obj->base.size),
2763                                      offset, size, obj->base.size));
2764
2765         obj->mm.dirty = true;
2766
2767         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)
2768                 return;
2769
2770         ptr = page_unpack_bits(obj->mm.mapping, &has_type);
2771         if (has_type == I915_MAP_WC)
2772                 return;
2773
2774         drm_clflush_virt_range(ptr + offset, size);
2775         if (size == obj->base.size) {
2776                 obj->write_domain &= ~I915_GEM_DOMAIN_CPU;
2777                 obj->cache_dirty = false;
2778         }
2779 }
2780
2781 static int
2782 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
2783                            const struct drm_i915_gem_pwrite *arg)
2784 {
2785         struct address_space *mapping = obj->base.filp->f_mapping;
2786         char __user *user_data = u64_to_user_ptr(arg->data_ptr);
2787         u64 remain, offset;
2788         unsigned int pg;
2789
2790         /* Before we instantiate/pin the backing store for our use, we
2791          * can prepopulate the shmemfs filp efficiently using a write into
2792          * the pagecache. We avoid the penalty of instantiating all the
2793          * pages, important if the user is just writing to a few and never
2794          * uses the object on the GPU, and using a direct write into shmemfs
2795          * allows it to avoid the cost of retrieving a page (either swapin
2796          * or clearing-before-use) before it is overwritten.
2797          */
2798         if (i915_gem_object_has_pages(obj))
2799                 return -ENODEV;
2800
2801         if (obj->mm.madv != I915_MADV_WILLNEED)
2802                 return -EFAULT;
2803
2804         /* Before the pages are instantiated the object is treated as being
2805          * in the CPU domain. The pages will be clflushed as required before
2806          * use, and we can freely write into the pages directly. If userspace
2807          * races pwrite with any other operation; corruption will ensue -
2808          * that is userspace's prerogative!
2809          */
2810
2811         remain = arg->size;
2812         offset = arg->offset;
2813         pg = offset_in_page(offset);
2814
2815         do {
2816                 unsigned int len, unwritten;
2817                 struct page *page;
2818                 void *data, *vaddr;
2819                 int err;
2820
2821                 len = PAGE_SIZE - pg;
2822                 if (len > remain)
2823                         len = remain;
2824
2825                 err = pagecache_write_begin(obj->base.filp, mapping,
2826                                             offset, len, 0,
2827                                             &page, &data);
2828                 if (err < 0)
2829                         return err;
2830
2831                 vaddr = kmap(page);
2832                 unwritten = copy_from_user(vaddr + pg, user_data, len);
2833                 kunmap(page);
2834
2835                 err = pagecache_write_end(obj->base.filp, mapping,
2836                                           offset, len, len - unwritten,
2837                                           page, data);
2838                 if (err < 0)
2839                         return err;
2840
2841                 if (unwritten)
2842                         return -EFAULT;
2843
2844                 remain -= len;
2845                 user_data += len;
2846                 offset += len;
2847                 pg = 0;
2848         } while (remain);
2849
2850         return 0;
2851 }
2852
2853 static void
2854 i915_gem_retire_work_handler(struct work_struct *work)
2855 {
2856         struct drm_i915_private *dev_priv =
2857                 container_of(work, typeof(*dev_priv), gt.retire_work.work);
2858         struct drm_device *dev = &dev_priv->drm;
2859
2860         /* Come back later if the device is busy... */
2861         if (mutex_trylock(&dev->struct_mutex)) {
2862                 i915_retire_requests(dev_priv);
2863                 mutex_unlock(&dev->struct_mutex);
2864         }
2865
2866         /*
2867          * Keep the retire handler running until we are finally idle.
2868          * We do not need to do this test under locking as in the worst-case
2869          * we queue the retire worker once too often.
2870          */
2871         if (READ_ONCE(dev_priv->gt.awake))
2872                 queue_delayed_work(dev_priv->wq,
2873                                    &dev_priv->gt.retire_work,
2874                                    round_jiffies_up_relative(HZ));
2875 }
2876
2877 static bool switch_to_kernel_context_sync(struct drm_i915_private *i915,
2878                                           unsigned long mask)
2879 {
2880         bool result = true;
2881
2882         /*
2883          * Even if we fail to switch, give whatever is running a small chance
2884          * to save itself before we report the failure. Yes, this may be a
2885          * false positive due to e.g. ENOMEM, caveat emptor!
2886          */
2887         if (i915_gem_switch_to_kernel_context(i915, mask))
2888                 result = false;
2889
2890         if (i915_gem_wait_for_idle(i915,
2891                                    I915_WAIT_LOCKED |
2892                                    I915_WAIT_FOR_IDLE_BOOST,
2893                                    I915_GEM_IDLE_TIMEOUT))
2894                 result = false;
2895
2896         if (!result) {
2897                 if (i915_modparams.reset) { /* XXX hide warning from gem_eio */
2898                         dev_err(i915->drm.dev,
2899                                 "Failed to idle engines, declaring wedged!\n");
2900                         GEM_TRACE_DUMP();
2901                 }
2902
2903                 /* Forcibly cancel outstanding work and leave the gpu quiet. */
2904                 i915_gem_set_wedged(i915);
2905         }
2906
2907         i915_retire_requests(i915); /* ensure we flush after wedging */
2908         return result;
2909 }
2910
2911 static bool load_power_context(struct drm_i915_private *i915)
2912 {
2913         /* Force loading the kernel context on all engines */
2914         if (!switch_to_kernel_context_sync(i915, ALL_ENGINES))
2915                 return false;
2916
2917         /*
2918          * Immediately park the GPU so that we enable powersaving and
2919          * treat it as idle. The next time we issue a request, we will
2920          * unpark and start using the engine->pinned_default_state, otherwise
2921          * it is in limbo and an early reset may fail.
2922          */
2923         __i915_gem_park(i915);
2924
2925         return true;
2926 }
2927
2928 static void
2929 i915_gem_idle_work_handler(struct work_struct *work)
2930 {
2931         struct drm_i915_private *i915 =
2932                 container_of(work, typeof(*i915), gt.idle_work.work);
2933         bool rearm_hangcheck;
2934
2935         if (!READ_ONCE(i915->gt.awake))
2936                 return;
2937
2938         if (READ_ONCE(i915->gt.active_requests))
2939                 return;
2940
2941         rearm_hangcheck =
2942                 cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
2943
2944         if (!mutex_trylock(&i915->drm.struct_mutex)) {
2945                 /* Currently busy, come back later */
2946                 mod_delayed_work(i915->wq,
2947                                  &i915->gt.idle_work,
2948                                  msecs_to_jiffies(50));
2949                 goto out_rearm;
2950         }
2951
2952         /*
2953          * Flush out the last user context, leaving only the pinned
2954          * kernel context resident. Should anything unfortunate happen
2955          * while we are idle (such as the GPU being power cycled), no users
2956          * will be harmed.
2957          */
2958         if (!work_pending(&i915->gt.idle_work.work) &&
2959             !i915->gt.active_requests) {
2960                 ++i915->gt.active_requests; /* don't requeue idle */
2961
2962                 switch_to_kernel_context_sync(i915, i915->gt.active_engines);
2963
2964                 if (!--i915->gt.active_requests) {
2965                         __i915_gem_park(i915);
2966                         rearm_hangcheck = false;
2967                 }
2968         }
2969
2970         mutex_unlock(&i915->drm.struct_mutex);
2971
2972 out_rearm:
2973         if (rearm_hangcheck) {
2974                 GEM_BUG_ON(!i915->gt.awake);
2975                 i915_queue_hangcheck(i915);
2976         }
2977 }
2978
2979 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
2980 {
2981         struct drm_i915_private *i915 = to_i915(gem->dev);
2982         struct drm_i915_gem_object *obj = to_intel_bo(gem);
2983         struct drm_i915_file_private *fpriv = file->driver_priv;
2984         struct i915_lut_handle *lut, *ln;
2985
2986         mutex_lock(&i915->drm.struct_mutex);
2987
2988         list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
2989                 struct i915_gem_context *ctx = lut->ctx;
2990                 struct i915_vma *vma;
2991
2992                 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
2993                 if (ctx->file_priv != fpriv)
2994                         continue;
2995
2996                 vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
2997                 GEM_BUG_ON(vma->obj != obj);
2998
2999                 /* We allow the process to have multiple handles to the same
3000                  * vma, in the same fd namespace, by virtue of flink/open.
3001                  */
3002                 GEM_BUG_ON(!vma->open_count);
3003                 if (!--vma->open_count && !i915_vma_is_ggtt(vma))
3004                         i915_vma_close(vma);
3005
3006                 list_del(&lut->obj_link);
3007                 list_del(&lut->ctx_link);
3008
3009                 i915_lut_handle_free(lut);
3010                 __i915_gem_object_release_unless_active(obj);
3011         }
3012
3013         mutex_unlock(&i915->drm.struct_mutex);
3014 }
3015
3016 static unsigned long to_wait_timeout(s64 timeout_ns)
3017 {
3018         if (timeout_ns < 0)
3019                 return MAX_SCHEDULE_TIMEOUT;
3020
3021         if (timeout_ns == 0)
3022                 return 0;
3023
3024         return nsecs_to_jiffies_timeout(timeout_ns);
3025 }
3026
3027 /**
3028  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
3029  * @dev: drm device pointer
3030  * @data: ioctl data blob
3031  * @file: drm file pointer
3032  *
3033  * Returns 0 if successful, else an error is returned with the remaining time in
3034  * the timeout parameter.
3035  *  -ETIME: object is still busy after timeout
3036  *  -ERESTARTSYS: signal interrupted the wait
3037  *  -ENONENT: object doesn't exist
3038  * Also possible, but rare:
3039  *  -EAGAIN: incomplete, restart syscall
3040  *  -ENOMEM: damn
3041  *  -ENODEV: Internal IRQ fail
3042  *  -E?: The add request failed
3043  *
3044  * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3045  * non-zero timeout parameter the wait ioctl will wait for the given number of
3046  * nanoseconds on an object becoming unbusy. Since the wait itself does so
3047  * without holding struct_mutex the object may become re-busied before this
3048  * function completes. A similar but shorter * race condition exists in the busy
3049  * ioctl
3050  */
3051 int
3052 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3053 {
3054         struct drm_i915_gem_wait *args = data;
3055         struct drm_i915_gem_object *obj;
3056         ktime_t start;
3057         long ret;
3058
3059         if (args->flags != 0)
3060                 return -EINVAL;
3061
3062         obj = i915_gem_object_lookup(file, args->bo_handle);
3063         if (!obj)
3064                 return -ENOENT;
3065
3066         start = ktime_get();
3067
3068         ret = i915_gem_object_wait(obj,
3069                                    I915_WAIT_INTERRUPTIBLE |
3070                                    I915_WAIT_PRIORITY |
3071                                    I915_WAIT_ALL,
3072                                    to_wait_timeout(args->timeout_ns));
3073
3074         if (args->timeout_ns > 0) {
3075                 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
3076                 if (args->timeout_ns < 0)
3077                         args->timeout_ns = 0;
3078
3079                 /*
3080                  * Apparently ktime isn't accurate enough and occasionally has a
3081                  * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
3082                  * things up to make the test happy. We allow up to 1 jiffy.
3083                  *
3084                  * This is a regression from the timespec->ktime conversion.
3085                  */
3086                 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
3087                         args->timeout_ns = 0;
3088
3089                 /* Asked to wait beyond the jiffie/scheduler precision? */
3090                 if (ret == -ETIME && args->timeout_ns)
3091                         ret = -EAGAIN;
3092         }
3093
3094         i915_gem_object_put(obj);
3095         return ret;
3096 }
3097
3098 static int wait_for_engines(struct drm_i915_private *i915)
3099 {
3100         if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
3101                 dev_err(i915->drm.dev,
3102                         "Failed to idle engines, declaring wedged!\n");
3103                 GEM_TRACE_DUMP();
3104                 i915_gem_set_wedged(i915);
3105                 return -EIO;
3106         }
3107
3108         return 0;
3109 }
3110
3111 static long
3112 wait_for_timelines(struct drm_i915_private *i915,
3113                    unsigned int flags, long timeout)
3114 {
3115         struct i915_gt_timelines *gt = &i915->gt.timelines;
3116         struct i915_timeline *tl;
3117
3118         if (!READ_ONCE(i915->gt.active_requests))
3119                 return timeout;
3120
3121         mutex_lock(&gt->mutex);
3122         list_for_each_entry(tl, &gt->active_list, link) {
3123                 struct i915_request *rq;
3124
3125                 rq = i915_active_request_get_unlocked(&tl->last_request);
3126                 if (!rq)
3127                         continue;
3128
3129                 mutex_unlock(&gt->mutex);
3130
3131                 /*
3132                  * "Race-to-idle".
3133                  *
3134                  * Switching to the kernel context is often used a synchronous
3135                  * step prior to idling, e.g. in suspend for flushing all
3136                  * current operations to memory before sleeping. These we
3137                  * want to complete as quickly as possible to avoid prolonged
3138                  * stalls, so allow the gpu to boost to maximum clocks.
3139                  */
3140                 if (flags & I915_WAIT_FOR_IDLE_BOOST)
3141                         gen6_rps_boost(rq);
3142
3143                 timeout = i915_request_wait(rq, flags, timeout);
3144                 i915_request_put(rq);
3145                 if (timeout < 0)
3146                         return timeout;
3147
3148                 /* restart after reacquiring the lock */
3149                 mutex_lock(&gt->mutex);
3150                 tl = list_entry(&gt->active_list, typeof(*tl), link);
3151         }
3152         mutex_unlock(&gt->mutex);
3153
3154         return timeout;
3155 }
3156
3157 int i915_gem_wait_for_idle(struct drm_i915_private *i915,
3158                            unsigned int flags, long timeout)
3159 {
3160         GEM_TRACE("flags=%x (%s), timeout=%ld%s\n",
3161                   flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
3162                   timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "");
3163
3164         /* If the device is asleep, we have no requests outstanding */
3165         if (!READ_ONCE(i915->gt.awake))
3166                 return 0;
3167
3168         timeout = wait_for_timelines(i915, flags, timeout);
3169         if (timeout < 0)
3170                 return timeout;
3171
3172         if (flags & I915_WAIT_LOCKED) {
3173                 int err;
3174
3175                 lockdep_assert_held(&i915->drm.struct_mutex);
3176
3177                 err = wait_for_engines(i915);
3178                 if (err)
3179                         return err;
3180
3181                 i915_retire_requests(i915);
3182         }
3183
3184         return 0;
3185 }
3186
3187 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
3188 {
3189         /*
3190          * We manually flush the CPU domain so that we can override and
3191          * force the flush for the display, and perform it asyncrhonously.
3192          */
3193         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
3194         if (obj->cache_dirty)
3195                 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
3196         obj->write_domain = 0;
3197 }
3198
3199 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
3200 {
3201         if (!READ_ONCE(obj->pin_global))
3202                 return;
3203
3204         mutex_lock(&obj->base.dev->struct_mutex);
3205         __i915_gem_object_flush_for_display(obj);
3206         mutex_unlock(&obj->base.dev->struct_mutex);
3207 }
3208
3209 /**
3210  * Moves a single object to the WC read, and possibly write domain.
3211  * @obj: object to act on
3212  * @write: ask for write access or read only
3213  *
3214  * This function returns when the move is complete, including waiting on
3215  * flushes to occur.
3216  */
3217 int
3218 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
3219 {
3220         int ret;
3221
3222         lockdep_assert_held(&obj->base.dev->struct_mutex);
3223
3224         ret = i915_gem_object_wait(obj,
3225                                    I915_WAIT_INTERRUPTIBLE |
3226                                    I915_WAIT_LOCKED |
3227                                    (write ? I915_WAIT_ALL : 0),
3228                                    MAX_SCHEDULE_TIMEOUT);
3229         if (ret)
3230                 return ret;
3231
3232         if (obj->write_domain == I915_GEM_DOMAIN_WC)
3233                 return 0;
3234
3235         /* Flush and acquire obj->pages so that we are coherent through
3236          * direct access in memory with previous cached writes through
3237          * shmemfs and that our cache domain tracking remains valid.
3238          * For example, if the obj->filp was moved to swap without us
3239          * being notified and releasing the pages, we would mistakenly
3240          * continue to assume that the obj remained out of the CPU cached
3241          * domain.
3242          */
3243         ret = i915_gem_object_pin_pages(obj);
3244         if (ret)
3245                 return ret;
3246
3247         flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
3248
3249         /* Serialise direct access to this object with the barriers for
3250          * coherent writes from the GPU, by effectively invalidating the
3251          * WC domain upon first access.
3252          */
3253         if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
3254                 mb();
3255
3256         /* It should now be out of any other write domains, and we can update
3257          * the domain values for our changes.
3258          */
3259         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
3260         obj->read_domains |= I915_GEM_DOMAIN_WC;
3261         if (write) {
3262                 obj->read_domains = I915_GEM_DOMAIN_WC;
3263                 obj->write_domain = I915_GEM_DOMAIN_WC;
3264                 obj->mm.dirty = true;
3265         }
3266
3267         i915_gem_object_unpin_pages(obj);
3268         return 0;
3269 }
3270
3271 /**
3272  * Moves a single object to the GTT read, and possibly write domain.
3273  * @obj: object to act on
3274  * @write: ask for write access or read only
3275  *
3276  * This function returns when the move is complete, including waiting on
3277  * flushes to occur.
3278  */
3279 int
3280 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3281 {
3282         int ret;
3283
3284         lockdep_assert_held(&obj->base.dev->struct_mutex);
3285
3286         ret = i915_gem_object_wait(obj,
3287                                    I915_WAIT_INTERRUPTIBLE |
3288                                    I915_WAIT_LOCKED |
3289                                    (write ? I915_WAIT_ALL : 0),
3290                                    MAX_SCHEDULE_TIMEOUT);
3291         if (ret)
3292                 return ret;
3293
3294         if (obj->write_domain == I915_GEM_DOMAIN_GTT)
3295                 return 0;
3296
3297         /* Flush and acquire obj->pages so that we are coherent through
3298          * direct access in memory with previous cached writes through
3299          * shmemfs and that our cache domain tracking remains valid.
3300          * For example, if the obj->filp was moved to swap without us
3301          * being notified and releasing the pages, we would mistakenly
3302          * continue to assume that the obj remained out of the CPU cached
3303          * domain.
3304          */
3305         ret = i915_gem_object_pin_pages(obj);
3306         if (ret)
3307                 return ret;
3308
3309         flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
3310
3311         /* Serialise direct access to this object with the barriers for
3312          * coherent writes from the GPU, by effectively invalidating the
3313          * GTT domain upon first access.
3314          */
3315         if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
3316                 mb();
3317
3318         /* It should now be out of any other write domains, and we can update
3319          * the domain values for our changes.
3320          */
3321         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3322         obj->read_domains |= I915_GEM_DOMAIN_GTT;
3323         if (write) {
3324                 obj->read_domains = I915_GEM_DOMAIN_GTT;
3325                 obj->write_domain = I915_GEM_DOMAIN_GTT;
3326                 obj->mm.dirty = true;
3327         }
3328
3329         i915_gem_object_unpin_pages(obj);
3330         return 0;
3331 }
3332
3333 /**
3334  * Changes the cache-level of an object across all VMA.
3335  * @obj: object to act on
3336  * @cache_level: new cache level to set for the object
3337  *
3338  * After this function returns, the object will be in the new cache-level
3339  * across all GTT and the contents of the backing storage will be coherent,
3340  * with respect to the new cache-level. In order to keep the backing storage
3341  * coherent for all users, we only allow a single cache level to be set
3342  * globally on the object and prevent it from being changed whilst the
3343  * hardware is reading from the object. That is if the object is currently
3344  * on the scanout it will be set to uncached (or equivalent display
3345  * cache coherency) and all non-MOCS GPU access will also be uncached so
3346  * that all direct access to the scanout remains coherent.
3347  */
3348 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3349                                     enum i915_cache_level cache_level)
3350 {
3351         struct i915_vma *vma;
3352         int ret;
3353
3354         lockdep_assert_held(&obj->base.dev->struct_mutex);
3355
3356         if (obj->cache_level == cache_level)
3357                 return 0;
3358
3359         /* Inspect the list of currently bound VMA and unbind any that would
3360          * be invalid given the new cache-level. This is principally to
3361          * catch the issue of the CS prefetch crossing page boundaries and
3362          * reading an invalid PTE on older architectures.
3363          */
3364 restart:
3365         list_for_each_entry(vma, &obj->vma.list, obj_link) {
3366                 if (!drm_mm_node_allocated(&vma->node))
3367                         continue;
3368
3369                 if (i915_vma_is_pinned(vma)) {
3370                         DRM_DEBUG("can not change the cache level of pinned objects\n");
3371                         return -EBUSY;
3372                 }
3373
3374                 if (!i915_vma_is_closed(vma) &&
3375                     i915_gem_valid_gtt_space(vma, cache_level))
3376                         continue;
3377
3378                 ret = i915_vma_unbind(vma);
3379                 if (ret)
3380                         return ret;
3381
3382                 /* As unbinding may affect other elements in the
3383                  * obj->vma_list (due to side-effects from retiring
3384                  * an active vma), play safe and restart the iterator.
3385                  */
3386                 goto restart;
3387         }
3388
3389         /* We can reuse the existing drm_mm nodes but need to change the
3390          * cache-level on the PTE. We could simply unbind them all and
3391          * rebind with the correct cache-level on next use. However since
3392          * we already have a valid slot, dma mapping, pages etc, we may as
3393          * rewrite the PTE in the belief that doing so tramples upon less
3394          * state and so involves less work.
3395          */
3396         if (obj->bind_count) {
3397                 /* Before we change the PTE, the GPU must not be accessing it.
3398                  * If we wait upon the object, we know that all the bound
3399                  * VMA are no longer active.
3400                  */
3401                 ret = i915_gem_object_wait(obj,
3402                                            I915_WAIT_INTERRUPTIBLE |
3403                                            I915_WAIT_LOCKED |
3404                                            I915_WAIT_ALL,
3405                                            MAX_SCHEDULE_TIMEOUT);
3406                 if (ret)
3407                         return ret;
3408
3409                 if (!HAS_LLC(to_i915(obj->base.dev)) &&
3410                     cache_level != I915_CACHE_NONE) {
3411                         /* Access to snoopable pages through the GTT is
3412                          * incoherent and on some machines causes a hard
3413                          * lockup. Relinquish the CPU mmaping to force
3414                          * userspace to refault in the pages and we can
3415                          * then double check if the GTT mapping is still
3416                          * valid for that pointer access.
3417                          */
3418                         i915_gem_release_mmap(obj);
3419
3420                         /* As we no longer need a fence for GTT access,
3421                          * we can relinquish it now (and so prevent having
3422                          * to steal a fence from someone else on the next
3423                          * fence request). Note GPU activity would have
3424                          * dropped the fence as all snoopable access is
3425                          * supposed to be linear.
3426                          */
3427                         for_each_ggtt_vma(vma, obj) {
3428                                 ret = i915_vma_put_fence(vma);
3429                                 if (ret)
3430                                         return ret;
3431                         }
3432                 } else {
3433                         /* We either have incoherent backing store and
3434                          * so no GTT access or the architecture is fully
3435                          * coherent. In such cases, existing GTT mmaps
3436                          * ignore the cache bit in the PTE and we can
3437                          * rewrite it without confusing the GPU or having
3438                          * to force userspace to fault back in its mmaps.
3439                          */
3440                 }
3441
3442                 list_for_each_entry(vma, &obj->vma.list, obj_link) {
3443                         if (!drm_mm_node_allocated(&vma->node))
3444                                 continue;
3445
3446                         ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3447                         if (ret)
3448                                 return ret;
3449                 }
3450         }
3451
3452         list_for_each_entry(vma, &obj->vma.list, obj_link)
3453                 vma->node.color = cache_level;
3454         i915_gem_object_set_cache_coherency(obj, cache_level);
3455         obj->cache_dirty = true; /* Always invalidate stale cachelines */
3456
3457         return 0;
3458 }
3459
3460 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3461                                struct drm_file *file)
3462 {
3463         struct drm_i915_gem_caching *args = data;
3464         struct drm_i915_gem_object *obj;
3465         int err = 0;
3466
3467         rcu_read_lock();
3468         obj = i915_gem_object_lookup_rcu(file, args->handle);
3469         if (!obj) {
3470                 err = -ENOENT;
3471                 goto out;
3472         }
3473
3474         switch (obj->cache_level) {
3475         case I915_CACHE_LLC:
3476         case I915_CACHE_L3_LLC:
3477                 args->caching = I915_CACHING_CACHED;
3478                 break;
3479
3480         case I915_CACHE_WT:
3481                 args->caching = I915_CACHING_DISPLAY;
3482                 break;
3483
3484         default:
3485                 args->caching = I915_CACHING_NONE;
3486                 break;
3487         }
3488 out:
3489         rcu_read_unlock();
3490         return err;
3491 }
3492
3493 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3494                                struct drm_file *file)
3495 {
3496         struct drm_i915_private *i915 = to_i915(dev);
3497         struct drm_i915_gem_caching *args = data;
3498         struct drm_i915_gem_object *obj;
3499         enum i915_cache_level level;
3500         int ret = 0;
3501
3502         switch (args->caching) {
3503         case I915_CACHING_NONE:
3504                 level = I915_CACHE_NONE;
3505                 break;
3506         case I915_CACHING_CACHED:
3507                 /*
3508                  * Due to a HW issue on BXT A stepping, GPU stores via a
3509                  * snooped mapping may leave stale data in a corresponding CPU
3510                  * cacheline, whereas normally such cachelines would get
3511                  * invalidated.
3512                  */
3513                 if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
3514                         return -ENODEV;
3515
3516                 level = I915_CACHE_LLC;
3517                 break;
3518         case I915_CACHING_DISPLAY:
3519                 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
3520                 break;
3521         default:
3522                 return -EINVAL;
3523         }
3524
3525         obj = i915_gem_object_lookup(file, args->handle);
3526         if (!obj)
3527                 return -ENOENT;
3528
3529         /*
3530          * The caching mode of proxy object is handled by its generator, and
3531          * not allowed to be changed by userspace.
3532          */
3533         if (i915_gem_object_is_proxy(obj)) {
3534                 ret = -ENXIO;
3535                 goto out;
3536         }
3537
3538         if (obj->cache_level == level)
3539                 goto out;
3540
3541         ret = i915_gem_object_wait(obj,
3542                                    I915_WAIT_INTERRUPTIBLE,
3543                                    MAX_SCHEDULE_TIMEOUT);
3544         if (ret)
3545                 goto out;
3546
3547         ret = i915_mutex_lock_interruptible(dev);
3548         if (ret)
3549                 goto out;
3550
3551         ret = i915_gem_object_set_cache_level(obj, level);
3552         mutex_unlock(&dev->struct_mutex);
3553
3554 out:
3555         i915_gem_object_put(obj);
3556         return ret;
3557 }
3558
3559 /*
3560  * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
3561  * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
3562  * (for pageflips). We only flush the caches while preparing the buffer for
3563  * display, the callers are responsible for frontbuffer flush.
3564  */
3565 struct i915_vma *
3566 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3567                                      u32 alignment,
3568                                      const struct i915_ggtt_view *view,
3569                                      unsigned int flags)
3570 {
3571         struct i915_vma *vma;
3572         int ret;
3573
3574         lockdep_assert_held(&obj->base.dev->struct_mutex);
3575
3576         /* Mark the global pin early so that we account for the
3577          * display coherency whilst setting up the cache domains.
3578          */
3579         obj->pin_global++;
3580
3581         /* The display engine is not coherent with the LLC cache on gen6.  As
3582          * a result, we make sure that the pinning that is about to occur is
3583          * done with uncached PTEs. This is lowest common denominator for all
3584          * chipsets.
3585          *
3586          * However for gen6+, we could do better by using the GFDT bit instead
3587          * of uncaching, which would allow us to flush all the LLC-cached data
3588          * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3589          */
3590         ret = i915_gem_object_set_cache_level(obj,
3591                                               HAS_WT(to_i915(obj->base.dev)) ?
3592                                               I915_CACHE_WT : I915_CACHE_NONE);
3593         if (ret) {
3594                 vma = ERR_PTR(ret);
3595                 goto err_unpin_global;
3596         }
3597
3598         /* As the user may map the buffer once pinned in the display plane
3599          * (e.g. libkms for the bootup splash), we have to ensure that we
3600          * always use map_and_fenceable for all scanout buffers. However,
3601          * it may simply be too big to fit into mappable, in which case
3602          * put it anyway and hope that userspace can cope (but always first
3603          * try to preserve the existing ABI).
3604          */
3605         vma = ERR_PTR(-ENOSPC);
3606         if ((flags & PIN_MAPPABLE) == 0 &&
3607             (!view || view->type == I915_GGTT_VIEW_NORMAL))
3608                 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
3609                                                flags |
3610                                                PIN_MAPPABLE |
3611                                                PIN_NONBLOCK);
3612         if (IS_ERR(vma))
3613                 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
3614         if (IS_ERR(vma))
3615                 goto err_unpin_global;
3616
3617         vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
3618
3619         __i915_gem_object_flush_for_display(obj);
3620
3621         /* It should now be out of any other write domains, and we can update
3622          * the domain values for our changes.
3623          */
3624         obj->read_domains |= I915_GEM_DOMAIN_GTT;
3625
3626         return vma;
3627
3628 err_unpin_global:
3629         obj->pin_global--;
3630         return vma;
3631 }
3632
3633 void
3634 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
3635 {
3636         lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
3637
3638         if (WARN_ON(vma->obj->pin_global == 0))
3639                 return;
3640
3641         if (--vma->obj->pin_global == 0)
3642                 vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
3643
3644         /* Bump the LRU to try and avoid premature eviction whilst flipping  */
3645         i915_gem_object_bump_inactive_ggtt(vma->obj);
3646
3647         i915_vma_unpin(vma);
3648 }
3649
3650 /**
3651  * Moves a single object to the CPU read, and possibly write domain.
3652  * @obj: object to act on
3653  * @write: requesting write or read-only access
3654  *
3655  * This function returns when the move is complete, including waiting on
3656  * flushes to occur.
3657  */
3658 int
3659 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3660 {
3661         int ret;
3662
3663         lockdep_assert_held(&obj->base.dev->struct_mutex);
3664
3665         ret = i915_gem_object_wait(obj,
3666                                    I915_WAIT_INTERRUPTIBLE |
3667                                    I915_WAIT_LOCKED |
3668                                    (write ? I915_WAIT_ALL : 0),
3669                                    MAX_SCHEDULE_TIMEOUT);
3670         if (ret)
3671                 return ret;
3672
3673         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
3674
3675         /* Flush the CPU cache if it's still invalid. */
3676         if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3677                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
3678                 obj->read_domains |= I915_GEM_DOMAIN_CPU;
3679         }
3680
3681         /* It should now be out of any other write domains, and we can update
3682          * the domain values for our changes.
3683          */
3684         GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
3685
3686         /* If we're writing through the CPU, then the GPU read domains will
3687          * need to be invalidated at next use.
3688          */
3689         if (write)
3690                 __start_cpu_write(obj);
3691
3692         return 0;
3693 }
3694
3695 /* Throttle our rendering by waiting until the ring has completed our requests
3696  * emitted over 20 msec ago.
3697  *
3698  * Note that if we were to use the current jiffies each time around the loop,
3699  * we wouldn't escape the function with any frames outstanding if the time to
3700  * render a frame was over 20ms.
3701  *
3702  * This should get us reasonable parallelism between CPU and GPU but also
3703  * relatively low latency when blocking on a particular request to finish.
3704  */
3705 static int
3706 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3707 {
3708         struct drm_i915_private *dev_priv = to_i915(dev);
3709         struct drm_i915_file_private *file_priv = file->driver_priv;
3710         unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
3711         struct i915_request *request, *target = NULL;
3712         long ret;
3713
3714         /* ABI: return -EIO if already wedged */
3715         ret = i915_terminally_wedged(dev_priv);
3716         if (ret)
3717                 return ret;
3718
3719         spin_lock(&file_priv->mm.lock);
3720         list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
3721                 if (time_after_eq(request->emitted_jiffies, recent_enough))
3722                         break;
3723
3724                 if (target) {
3725                         list_del(&target->client_link);
3726                         target->file_priv = NULL;
3727                 }
3728
3729                 target = request;
3730         }
3731         if (target)
3732                 i915_request_get(target);
3733         spin_unlock(&file_priv->mm.lock);
3734
3735         if (target == NULL)
3736                 return 0;
3737
3738         ret = i915_request_wait(target,
3739                                 I915_WAIT_INTERRUPTIBLE,
3740                                 MAX_SCHEDULE_TIMEOUT);
3741         i915_request_put(target);
3742
3743         return ret < 0 ? ret : 0;
3744 }
3745
3746 struct i915_vma *
3747 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3748                          const struct i915_ggtt_view *view,
3749                          u64 size,
3750                          u64 alignment,
3751                          u64 flags)
3752 {
3753         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3754         struct i915_address_space *vm = &dev_priv->ggtt.vm;
3755         struct i915_vma *vma;
3756         int ret;
3757
3758         lockdep_assert_held(&obj->base.dev->struct_mutex);
3759
3760         if (flags & PIN_MAPPABLE &&
3761             (!view || view->type == I915_GGTT_VIEW_NORMAL)) {
3762                 /* If the required space is larger than the available
3763                  * aperture, we will not able to find a slot for the
3764                  * object and unbinding the object now will be in
3765                  * vain. Worse, doing so may cause us to ping-pong
3766                  * the object in and out of the Global GTT and
3767                  * waste a lot of cycles under the mutex.
3768                  */
3769                 if (obj->base.size > dev_priv->ggtt.mappable_end)
3770                         return ERR_PTR(-E2BIG);
3771
3772                 /* If NONBLOCK is set the caller is optimistically
3773                  * trying to cache the full object within the mappable
3774                  * aperture, and *must* have a fallback in place for
3775                  * situations where we cannot bind the object. We
3776                  * can be a little more lax here and use the fallback
3777                  * more often to avoid costly migrations of ourselves
3778                  * and other objects within the aperture.
3779                  *
3780                  * Half-the-aperture is used as a simple heuristic.
3781                  * More interesting would to do search for a free
3782                  * block prior to making the commitment to unbind.
3783                  * That caters for the self-harm case, and with a
3784                  * little more heuristics (e.g. NOFAULT, NOEVICT)
3785                  * we could try to minimise harm to others.
3786                  */
3787                 if (flags & PIN_NONBLOCK &&
3788                     obj->base.size > dev_priv->ggtt.mappable_end / 2)
3789                         return ERR_PTR(-ENOSPC);
3790         }
3791
3792         vma = i915_vma_instance(obj, vm, view);
3793         if (IS_ERR(vma))
3794                 return vma;
3795
3796         if (i915_vma_misplaced(vma, size, alignment, flags)) {
3797                 if (flags & PIN_NONBLOCK) {
3798                         if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))
3799                                 return ERR_PTR(-ENOSPC);
3800
3801                         if (flags & PIN_MAPPABLE &&
3802                             vma->fence_size > dev_priv->ggtt.mappable_end / 2)
3803                                 return ERR_PTR(-ENOSPC);
3804                 }
3805
3806                 WARN(i915_vma_is_pinned(vma),
3807                      "bo is already pinned in ggtt with incorrect alignment:"
3808                      " offset=%08x, req.alignment=%llx,"
3809                      " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
3810                      i915_ggtt_offset(vma), alignment,
3811                      !!(flags & PIN_MAPPABLE),
3812                      i915_vma_is_map_and_fenceable(vma));
3813                 ret = i915_vma_unbind(vma);
3814                 if (ret)
3815                         return ERR_PTR(ret);
3816         }
3817
3818         ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
3819         if (ret)
3820                 return ERR_PTR(ret);
3821
3822         return vma;
3823 }
3824
3825 static __always_inline unsigned int __busy_read_flag(unsigned int id)
3826 {
3827         if (id == I915_ENGINE_CLASS_INVALID)
3828                 return 0xffff0000;
3829
3830         GEM_BUG_ON(id >= 16);
3831         return 0x10000 << id;
3832 }
3833
3834 static __always_inline unsigned int __busy_write_id(unsigned int id)
3835 {
3836         /*
3837          * The uABI guarantees an active writer is also amongst the read
3838          * engines. This would be true if we accessed the activity tracking
3839          * under the lock, but as we perform the lookup of the object and
3840          * its activity locklessly we can not guarantee that the last_write
3841          * being active implies that we have set the same engine flag from
3842          * last_read - hence we always set both read and write busy for
3843          * last_write.
3844          */
3845         if (id == I915_ENGINE_CLASS_INVALID)
3846                 return 0xffffffff;
3847
3848         return (id + 1) | __busy_read_flag(id);
3849 }
3850
3851 static __always_inline unsigned int
3852 __busy_set_if_active(const struct dma_fence *fence,
3853                      unsigned int (*flag)(unsigned int id))
3854 {
3855         const struct i915_request *rq;
3856
3857         /*
3858          * We have to check the current hw status of the fence as the uABI
3859          * guarantees forward progress. We could rely on the idle worker
3860          * to eventually flush us, but to minimise latency just ask the
3861          * hardware.
3862          *
3863          * Note we only report on the status of native fences.
3864          */
3865         if (!dma_fence_is_i915(fence))
3866                 return 0;
3867
3868         /* opencode to_request() in order to avoid const warnings */
3869         rq = container_of(fence, const struct i915_request, fence);
3870         if (i915_request_completed(rq))
3871                 return 0;
3872
3873         return flag(rq->engine->uabi_class);
3874 }
3875
3876 static __always_inline unsigned int
3877 busy_check_reader(const struct dma_fence *fence)
3878 {
3879         return __busy_set_if_active(fence, __busy_read_flag);
3880 }
3881
3882 static __always_inline unsigned int
3883 busy_check_writer(const struct dma_fence *fence)
3884 {
3885         if (!fence)
3886                 return 0;
3887
3888         return __busy_set_if_active(fence, __busy_write_id);
3889 }
3890
3891 int
3892 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3893                     struct drm_file *file)
3894 {
3895         struct drm_i915_gem_busy *args = data;
3896         struct drm_i915_gem_object *obj;
3897         struct reservation_object_list *list;
3898         unsigned int seq;
3899         int err;
3900
3901         err = -ENOENT;
3902         rcu_read_lock();
3903         obj = i915_gem_object_lookup_rcu(file, args->handle);
3904         if (!obj)
3905                 goto out;
3906
3907         /*
3908          * A discrepancy here is that we do not report the status of
3909          * non-i915 fences, i.e. even though we may report the object as idle,
3910          * a call to set-domain may still stall waiting for foreign rendering.
3911          * This also means that wait-ioctl may report an object as busy,
3912          * where busy-ioctl considers it idle.
3913          *
3914          * We trade the ability to warn of foreign fences to report on which
3915          * i915 engines are active for the object.
3916          *
3917          * Alternatively, we can trade that extra information on read/write
3918          * activity with
3919          *      args->busy =
3920          *              !reservation_object_test_signaled_rcu(obj->resv, true);
3921          * to report the overall busyness. This is what the wait-ioctl does.
3922          *
3923          */
3924 retry:
3925         seq = raw_read_seqcount(&obj->resv->seq);
3926
3927         /* Translate the exclusive fence to the READ *and* WRITE engine */
3928         args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
3929
3930         /* Translate shared fences to READ set of engines */
3931         list = rcu_dereference(obj->resv->fence);
3932         if (list) {
3933                 unsigned int shared_count = list->shared_count, i;
3934
3935                 for (i = 0; i < shared_count; ++i) {
3936                         struct dma_fence *fence =
3937                                 rcu_dereference(list->shared[i]);
3938
3939                         args->busy |= busy_check_reader(fence);
3940                 }
3941         }
3942
3943         if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
3944                 goto retry;
3945
3946         err = 0;
3947 out:
3948         rcu_read_unlock();
3949         return err;
3950 }
3951
3952 int
3953 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3954                         struct drm_file *file_priv)
3955 {
3956         return i915_gem_ring_throttle(dev, file_priv);
3957 }
3958
3959 int
3960 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3961                        struct drm_file *file_priv)
3962 {
3963         struct drm_i915_private *dev_priv = to_i915(dev);
3964         struct drm_i915_gem_madvise *args = data;
3965         struct drm_i915_gem_object *obj;
3966         int err;
3967
3968         switch (args->madv) {
3969         case I915_MADV_DONTNEED:
3970         case I915_MADV_WILLNEED:
3971             break;
3972         default:
3973             return -EINVAL;
3974         }
3975
3976         obj = i915_gem_object_lookup(file_priv, args->handle);
3977         if (!obj)
3978                 return -ENOENT;
3979
3980         err = mutex_lock_interruptible(&obj->mm.lock);
3981         if (err)
3982                 goto out;
3983
3984         if (i915_gem_object_has_pages(obj) &&
3985             i915_gem_object_is_tiled(obj) &&
3986             dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
3987                 if (obj->mm.madv == I915_MADV_WILLNEED) {
3988                         GEM_BUG_ON(!obj->mm.quirked);
3989                         __i915_gem_object_unpin_pages(obj);
3990                         obj->mm.quirked = false;
3991                 }
3992                 if (args->madv == I915_MADV_WILLNEED) {
3993                         GEM_BUG_ON(obj->mm.quirked);
3994                         __i915_gem_object_pin_pages(obj);
3995                         obj->mm.quirked = true;
3996                 }
3997         }
3998
3999         if (obj->mm.madv != __I915_MADV_PURGED)
4000                 obj->mm.madv = args->madv;
4001
4002         /* if the object is no longer attached, discard its backing storage */
4003         if (obj->mm.madv == I915_MADV_DONTNEED &&
4004             !i915_gem_object_has_pages(obj))
4005                 i915_gem_object_truncate(obj);
4006
4007         args->retained = obj->mm.madv != __I915_MADV_PURGED;
4008         mutex_unlock(&obj->mm.lock);
4009
4010 out:
4011         i915_gem_object_put(obj);
4012         return err;
4013 }
4014
4015 static void
4016 frontbuffer_retire(struct i915_active_request *active,
4017                    struct i915_request *request)
4018 {
4019         struct drm_i915_gem_object *obj =
4020                 container_of(active, typeof(*obj), frontbuffer_write);
4021
4022         intel_fb_obj_flush(obj, ORIGIN_CS);
4023 }
4024
4025 void i915_gem_object_init(struct drm_i915_gem_object *obj,
4026                           const struct drm_i915_gem_object_ops *ops)
4027 {
4028         mutex_init(&obj->mm.lock);
4029
4030         spin_lock_init(&obj->vma.lock);
4031         INIT_LIST_HEAD(&obj->vma.list);
4032
4033         INIT_LIST_HEAD(&obj->lut_list);
4034         INIT_LIST_HEAD(&obj->batch_pool_link);
4035
4036         init_rcu_head(&obj->rcu);
4037
4038         obj->ops = ops;
4039
4040         reservation_object_init(&obj->__builtin_resv);
4041         obj->resv = &obj->__builtin_resv;
4042
4043         obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
4044         i915_active_request_init(&obj->frontbuffer_write,
4045                                  NULL, frontbuffer_retire);
4046
4047         obj->mm.madv = I915_MADV_WILLNEED;
4048         INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
4049         mutex_init(&obj->mm.get_page.lock);
4050
4051         i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
4052 }
4053
4054 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4055         .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
4056                  I915_GEM_OBJECT_IS_SHRINKABLE,
4057
4058         .get_pages = i915_gem_object_get_pages_gtt,
4059         .put_pages = i915_gem_object_put_pages_gtt,
4060
4061         .pwrite = i915_gem_object_pwrite_gtt,
4062 };
4063
4064 static int i915_gem_object_create_shmem(struct drm_device *dev,
4065                                         struct drm_gem_object *obj,
4066                                         size_t size)
4067 {
4068         struct drm_i915_private *i915 = to_i915(dev);
4069         unsigned long flags = VM_NORESERVE;
4070         struct file *filp;
4071
4072         drm_gem_private_object_init(dev, obj, size);
4073
4074         if (i915->mm.gemfs)
4075                 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
4076                                                  flags);
4077         else
4078                 filp = shmem_file_setup("i915", size, flags);
4079
4080         if (IS_ERR(filp))
4081                 return PTR_ERR(filp);
4082
4083         obj->filp = filp;
4084
4085         return 0;
4086 }
4087
4088 struct drm_i915_gem_object *
4089 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
4090 {
4091         struct drm_i915_gem_object *obj;
4092         struct address_space *mapping;
4093         unsigned int cache_level;
4094         gfp_t mask;
4095         int ret;
4096
4097         /* There is a prevalence of the assumption that we fit the object's
4098          * page count inside a 32bit _signed_ variable. Let's document this and
4099          * catch if we ever need to fix it. In the meantime, if you do spot
4100          * such a local variable, please consider fixing!
4101          */
4102         if (size >> PAGE_SHIFT > INT_MAX)
4103                 return ERR_PTR(-E2BIG);
4104
4105         if (overflows_type(size, obj->base.size))
4106                 return ERR_PTR(-E2BIG);
4107
4108         obj = i915_gem_object_alloc();
4109         if (obj == NULL)
4110                 return ERR_PTR(-ENOMEM);
4111
4112         ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size);
4113         if (ret)
4114                 goto fail;
4115
4116         mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4117         if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) {
4118                 /* 965gm cannot relocate objects above 4GiB. */
4119                 mask &= ~__GFP_HIGHMEM;
4120                 mask |= __GFP_DMA32;
4121         }
4122
4123         mapping = obj->base.filp->f_mapping;
4124         mapping_set_gfp_mask(mapping, mask);
4125         GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
4126
4127         i915_gem_object_init(obj, &i915_gem_object_ops);
4128
4129         obj->write_domain = I915_GEM_DOMAIN_CPU;
4130         obj->read_domains = I915_GEM_DOMAIN_CPU;
4131
4132         if (HAS_LLC(dev_priv))
4133                 /* On some devices, we can have the GPU use the LLC (the CPU
4134                  * cache) for about a 10% performance improvement
4135                  * compared to uncached.  Graphics requests other than
4136                  * display scanout are coherent with the CPU in
4137                  * accessing this cache.  This means in this mode we
4138                  * don't need to clflush on the CPU side, and on the
4139                  * GPU side we only need to flush internal caches to
4140                  * get data visible to the CPU.
4141                  *
4142                  * However, we maintain the display planes as UC, and so
4143                  * need to rebind when first used as such.
4144                  */
4145                 cache_level = I915_CACHE_LLC;
4146         else
4147                 cache_level = I915_CACHE_NONE;
4148
4149         i915_gem_object_set_cache_coherency(obj, cache_level);
4150
4151         trace_i915_gem_object_create(obj);
4152
4153         return obj;
4154
4155 fail:
4156         i915_gem_object_free(obj);
4157         return ERR_PTR(ret);
4158 }
4159
4160 static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4161 {
4162         /* If we are the last user of the backing storage (be it shmemfs
4163          * pages or stolen etc), we know that the pages are going to be
4164          * immediately released. In this case, we can then skip copying
4165          * back the contents from the GPU.
4166          */
4167
4168         if (obj->mm.madv != I915_MADV_WILLNEED)
4169                 return false;
4170
4171         if (obj->base.filp == NULL)
4172                 return true;
4173
4174         /* At first glance, this looks racy, but then again so would be
4175          * userspace racing mmap against close. However, the first external
4176          * reference to the filp can only be obtained through the
4177          * i915_gem_mmap_ioctl() which safeguards us against the user
4178          * acquiring such a reference whilst we are in the middle of
4179          * freeing the object.
4180          */
4181         return atomic_long_read(&obj->base.filp->f_count) == 1;
4182 }
4183
4184 static void __i915_gem_free_objects(struct drm_i915_private *i915,
4185                                     struct llist_node *freed)
4186 {
4187         struct drm_i915_gem_object *obj, *on;
4188         intel_wakeref_t wakeref;
4189
4190         wakeref = intel_runtime_pm_get(i915);
4191         llist_for_each_entry_safe(obj, on, freed, freed) {
4192                 struct i915_vma *vma, *vn;
4193
4194                 trace_i915_gem_object_destroy(obj);
4195
4196                 mutex_lock(&i915->drm.struct_mutex);
4197
4198                 GEM_BUG_ON(i915_gem_object_is_active(obj));
4199                 list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
4200                         GEM_BUG_ON(i915_vma_is_active(vma));
4201                         vma->flags &= ~I915_VMA_PIN_MASK;
4202                         i915_vma_destroy(vma);
4203                 }
4204                 GEM_BUG_ON(!list_empty(&obj->vma.list));
4205                 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
4206
4207                 /* This serializes freeing with the shrinker. Since the free
4208                  * is delayed, first by RCU then by the workqueue, we want the
4209                  * shrinker to be able to free pages of unreferenced objects,
4210                  * or else we may oom whilst there are plenty of deferred
4211                  * freed objects.
4212                  */
4213                 if (i915_gem_object_has_pages(obj)) {
4214                         spin_lock(&i915->mm.obj_lock);
4215                         list_del_init(&obj->mm.link);
4216                         spin_unlock(&i915->mm.obj_lock);
4217                 }
4218
4219                 mutex_unlock(&i915->drm.struct_mutex);
4220
4221                 GEM_BUG_ON(obj->bind_count);
4222                 GEM_BUG_ON(obj->userfault_count);
4223                 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
4224                 GEM_BUG_ON(!list_empty(&obj->lut_list));
4225
4226                 if (obj->ops->release)
4227                         obj->ops->release(obj);
4228
4229                 if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
4230                         atomic_set(&obj->mm.pages_pin_count, 0);
4231                 __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
4232                 GEM_BUG_ON(i915_gem_object_has_pages(obj));
4233
4234                 if (obj->base.import_attach)
4235                         drm_prime_gem_destroy(&obj->base, NULL);
4236
4237                 reservation_object_fini(&obj->__builtin_resv);
4238                 drm_gem_object_release(&obj->base);
4239                 i915_gem_info_remove_obj(i915, obj->base.size);
4240
4241                 bitmap_free(obj->bit_17);
4242                 i915_gem_object_free(obj);
4243
4244                 GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
4245                 atomic_dec(&i915->mm.free_count);
4246
4247                 if (on)
4248                         cond_resched();
4249         }
4250         intel_runtime_pm_put(i915, wakeref);
4251 }
4252
4253 static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
4254 {
4255         struct llist_node *freed;
4256
4257         /* Free the oldest, most stale object to keep the free_list short */
4258         freed = NULL;
4259         if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */
4260                 /* Only one consumer of llist_del_first() allowed */
4261                 spin_lock(&i915->mm.free_lock);
4262                 freed = llist_del_first(&i915->mm.free_list);
4263                 spin_unlock(&i915->mm.free_lock);
4264         }
4265         if (unlikely(freed)) {
4266                 freed->next = NULL;
4267                 __i915_gem_free_objects(i915, freed);
4268         }
4269 }
4270
4271 static void __i915_gem_free_work(struct work_struct *work)
4272 {
4273         struct drm_i915_private *i915 =
4274                 container_of(work, struct drm_i915_private, mm.free_work);
4275         struct llist_node *freed;
4276
4277         /*
4278          * All file-owned VMA should have been released by this point through
4279          * i915_gem_close_object(), or earlier by i915_gem_context_close().
4280          * However, the object may also be bound into the global GTT (e.g.
4281          * older GPUs without per-process support, or for direct access through
4282          * the GTT either for the user or for scanout). Those VMA still need to
4283          * unbound now.
4284          */
4285
4286         spin_lock(&i915->mm.free_lock);
4287         while ((freed = llist_del_all(&i915->mm.free_list))) {
4288                 spin_unlock(&i915->mm.free_lock);
4289
4290                 __i915_gem_free_objects(i915, freed);
4291                 if (need_resched())
4292                         return;
4293
4294                 spin_lock(&i915->mm.free_lock);
4295         }
4296         spin_unlock(&i915->mm.free_lock);
4297 }
4298
4299 static void __i915_gem_free_object_rcu(struct rcu_head *head)
4300 {
4301         struct drm_i915_gem_object *obj =
4302                 container_of(head, typeof(*obj), rcu);
4303         struct drm_i915_private *i915 = to_i915(obj->base.dev);
4304
4305         /*
4306          * We reuse obj->rcu for the freed list, so we had better not treat
4307          * it like a rcu_head from this point forwards. And we expect all
4308          * objects to be freed via this path.
4309          */
4310         destroy_rcu_head(&obj->rcu);
4311
4312         /*
4313          * Since we require blocking on struct_mutex to unbind the freed
4314          * object from the GPU before releasing resources back to the
4315          * system, we can not do that directly from the RCU callback (which may
4316          * be a softirq context), but must instead then defer that work onto a
4317          * kthread. We use the RCU callback rather than move the freed object
4318          * directly onto the work queue so that we can mix between using the
4319          * worker and performing frees directly from subsequent allocations for
4320          * crude but effective memory throttling.
4321          */
4322         if (llist_add(&obj->freed, &i915->mm.free_list))
4323                 queue_work(i915->wq, &i915->mm.free_work);
4324 }
4325
4326 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4327 {
4328         struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4329
4330         if (obj->mm.quirked)
4331                 __i915_gem_object_unpin_pages(obj);
4332
4333         if (discard_backing_storage(obj))
4334                 obj->mm.madv = I915_MADV_DONTNEED;
4335
4336         /*
4337          * Before we free the object, make sure any pure RCU-only
4338          * read-side critical sections are complete, e.g.
4339          * i915_gem_busy_ioctl(). For the corresponding synchronized
4340          * lookup see i915_gem_object_lookup_rcu().
4341          */
4342         atomic_inc(&to_i915(obj->base.dev)->mm.free_count);
4343         call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
4344 }
4345
4346 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
4347 {
4348         lockdep_assert_held(&obj->base.dev->struct_mutex);
4349
4350         if (!i915_gem_object_has_active_reference(obj) &&
4351             i915_gem_object_is_active(obj))
4352                 i915_gem_object_set_active_reference(obj);
4353         else
4354                 i915_gem_object_put(obj);
4355 }
4356
4357 void i915_gem_sanitize(struct drm_i915_private *i915)
4358 {
4359         intel_wakeref_t wakeref;
4360
4361         GEM_TRACE("\n");
4362
4363         wakeref = intel_runtime_pm_get(i915);
4364         intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
4365
4366         /*
4367          * As we have just resumed the machine and woken the device up from
4368          * deep PCI sleep (presumably D3_cold), assume the HW has been reset
4369          * back to defaults, recovering from whatever wedged state we left it
4370          * in and so worth trying to use the device once more.
4371          */
4372         if (i915_terminally_wedged(i915))
4373                 i915_gem_unset_wedged(i915);
4374
4375         /*
4376          * If we inherit context state from the BIOS or earlier occupants
4377          * of the GPU, the GPU may be in an inconsistent state when we
4378          * try to take over. The only way to remove the earlier state
4379          * is by resetting. However, resetting on earlier gen is tricky as
4380          * it may impact the display and we are uncertain about the stability
4381          * of the reset, so this could be applied to even earlier gen.
4382          */
4383         intel_engines_sanitize(i915, false);
4384
4385         intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
4386         intel_runtime_pm_put(i915, wakeref);
4387
4388         mutex_lock(&i915->drm.struct_mutex);
4389         i915_gem_contexts_lost(i915);
4390         mutex_unlock(&i915->drm.struct_mutex);
4391 }
4392
4393 void i915_gem_suspend(struct drm_i915_private *i915)
4394 {
4395         intel_wakeref_t wakeref;
4396
4397         GEM_TRACE("\n");
4398
4399         wakeref = intel_runtime_pm_get(i915);
4400
4401         flush_workqueue(i915->wq);
4402
4403         mutex_lock(&i915->drm.struct_mutex);
4404
4405         /*
4406          * We have to flush all the executing contexts to main memory so
4407          * that they can saved in the hibernation image. To ensure the last
4408          * context image is coherent, we have to switch away from it. That
4409          * leaves the i915->kernel_context still active when
4410          * we actually suspend, and its image in memory may not match the GPU
4411          * state. Fortunately, the kernel_context is disposable and we do
4412          * not rely on its state.
4413          */
4414         switch_to_kernel_context_sync(i915, i915->gt.active_engines);
4415
4416         mutex_unlock(&i915->drm.struct_mutex);
4417         i915_reset_flush(i915);
4418
4419         drain_delayed_work(&i915->gt.retire_work);
4420
4421         /*
4422          * As the idle_work is rearming if it detects a race, play safe and
4423          * repeat the flush until it is definitely idle.
4424          */
4425         drain_delayed_work(&i915->gt.idle_work);
4426
4427         /*
4428          * Assert that we successfully flushed all the work and
4429          * reset the GPU back to its idle, low power state.
4430          */
4431         GEM_BUG_ON(i915->gt.awake);
4432
4433         intel_uc_suspend(i915);
4434
4435         intel_runtime_pm_put(i915, wakeref);
4436 }
4437
4438 void i915_gem_suspend_late(struct drm_i915_private *i915)
4439 {
4440         struct drm_i915_gem_object *obj;
4441         struct list_head *phases[] = {
4442                 &i915->mm.unbound_list,
4443                 &i915->mm.bound_list,
4444                 NULL
4445         }, **phase;
4446
4447         /*
4448          * Neither the BIOS, ourselves or any other kernel
4449          * expects the system to be in execlists mode on startup,
4450          * so we need to reset the GPU back to legacy mode. And the only
4451          * known way to disable logical contexts is through a GPU reset.
4452          *
4453          * So in order to leave the system in a known default configuration,
4454          * always reset the GPU upon unload and suspend. Afterwards we then
4455          * clean up the GEM state tracking, flushing off the requests and
4456          * leaving the system in a known idle state.
4457          *
4458          * Note that is of the upmost importance that the GPU is idle and
4459          * all stray writes are flushed *before* we dismantle the backing
4460          * storage for the pinned objects.
4461          *
4462          * However, since we are uncertain that resetting the GPU on older
4463          * machines is a good idea, we don't - just in case it leaves the
4464          * machine in an unusable condition.
4465          */
4466
4467         mutex_lock(&i915->drm.struct_mutex);
4468         for (phase = phases; *phase; phase++) {
4469                 list_for_each_entry(obj, *phase, mm.link)
4470                         WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
4471         }
4472         mutex_unlock(&i915->drm.struct_mutex);
4473
4474         intel_uc_sanitize(i915);
4475         i915_gem_sanitize(i915);
4476 }
4477
4478 void i915_gem_resume(struct drm_i915_private *i915)
4479 {
4480         GEM_TRACE("\n");
4481
4482         WARN_ON(i915->gt.awake);
4483
4484         mutex_lock(&i915->drm.struct_mutex);
4485         intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
4486
4487         i915_gem_restore_gtt_mappings(i915);
4488         i915_gem_restore_fences(i915);
4489
4490         /*
4491          * As we didn't flush the kernel context before suspend, we cannot
4492          * guarantee that the context image is complete. So let's just reset
4493          * it and start again.
4494          */
4495         i915->gt.resume(i915);
4496
4497         if (i915_gem_init_hw(i915))
4498                 goto err_wedged;
4499
4500         intel_uc_resume(i915);
4501
4502         /* Always reload a context for powersaving. */
4503         if (!load_power_context(i915))
4504                 goto err_wedged;
4505
4506 out_unlock:
4507         intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
4508         mutex_unlock(&i915->drm.struct_mutex);
4509         return;
4510
4511 err_wedged:
4512         if (!i915_reset_failed(i915)) {
4513                 dev_err(i915->drm.dev,
4514                         "Failed to re-initialize GPU, declaring it wedged!\n");
4515                 i915_gem_set_wedged(i915);
4516         }
4517         goto out_unlock;
4518 }
4519
4520 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
4521 {
4522         if (INTEL_GEN(dev_priv) < 5 ||
4523             dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4524                 return;
4525
4526         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4527                                  DISP_TILE_SURFACE_SWIZZLING);
4528
4529         if (IS_GEN(dev_priv, 5))
4530                 return;
4531
4532         I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4533         if (IS_GEN(dev_priv, 6))
4534                 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
4535         else if (IS_GEN(dev_priv, 7))
4536                 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4537         else if (IS_GEN(dev_priv, 8))
4538                 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
4539         else
4540                 BUG();
4541 }
4542
4543 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base)
4544 {
4545         I915_WRITE(RING_CTL(base), 0);
4546         I915_WRITE(RING_HEAD(base), 0);
4547         I915_WRITE(RING_TAIL(base), 0);
4548         I915_WRITE(RING_START(base), 0);
4549 }
4550
4551 static void init_unused_rings(struct drm_i915_private *dev_priv)
4552 {
4553         if (IS_I830(dev_priv)) {
4554                 init_unused_ring(dev_priv, PRB1_BASE);
4555                 init_unused_ring(dev_priv, SRB0_BASE);
4556                 init_unused_ring(dev_priv, SRB1_BASE);
4557                 init_unused_ring(dev_priv, SRB2_BASE);
4558                 init_unused_ring(dev_priv, SRB3_BASE);
4559         } else if (IS_GEN(dev_priv, 2)) {
4560                 init_unused_ring(dev_priv, SRB0_BASE);
4561                 init_unused_ring(dev_priv, SRB1_BASE);
4562         } else if (IS_GEN(dev_priv, 3)) {
4563                 init_unused_ring(dev_priv, PRB1_BASE);
4564                 init_unused_ring(dev_priv, PRB2_BASE);
4565         }
4566 }
4567
4568 static int __i915_gem_restart_engines(void *data)
4569 {
4570         struct drm_i915_private *i915 = data;
4571         struct intel_engine_cs *engine;
4572         enum intel_engine_id id;
4573         int err;
4574
4575         for_each_engine(engine, i915, id) {
4576                 err = engine->init_hw(engine);
4577                 if (err) {
4578                         DRM_ERROR("Failed to restart %s (%d)\n",
4579                                   engine->name, err);
4580                         return err;
4581                 }
4582         }
4583
4584         intel_engines_set_scheduler_caps(i915);
4585
4586         return 0;
4587 }
4588
4589 int i915_gem_init_hw(struct drm_i915_private *dev_priv)
4590 {
4591         int ret;
4592
4593         dev_priv->gt.last_init_time = ktime_get();
4594
4595         /* Double layer security blanket, see i915_gem_init() */
4596         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
4597
4598         if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9)
4599                 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
4600
4601         if (IS_HASWELL(dev_priv))
4602                 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ?
4603                            LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
4604
4605         /* Apply the GT workarounds... */
4606         intel_gt_apply_workarounds(dev_priv);
4607         /* ...and determine whether they are sticking. */
4608         intel_gt_verify_workarounds(dev_priv, "init");
4609
4610         i915_gem_init_swizzling(dev_priv);
4611
4612         /*
4613          * At least 830 can leave some of the unused rings
4614          * "active" (ie. head != tail) after resume which
4615          * will prevent c3 entry. Makes sure all unused rings
4616          * are totally idle.
4617          */
4618         init_unused_rings(dev_priv);
4619
4620         BUG_ON(!dev_priv->kernel_context);
4621         ret = i915_terminally_wedged(dev_priv);
4622         if (ret)
4623                 goto out;
4624
4625         ret = i915_ppgtt_init_hw(dev_priv);
4626         if (ret) {
4627                 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
4628                 goto out;
4629         }
4630
4631         ret = intel_wopcm_init_hw(&dev_priv->wopcm);
4632         if (ret) {
4633                 DRM_ERROR("Enabling WOPCM failed (%d)\n", ret);
4634                 goto out;
4635         }
4636
4637         /* We can't enable contexts until all firmware is loaded */
4638         ret = intel_uc_init_hw(dev_priv);
4639         if (ret) {
4640                 DRM_ERROR("Enabling uc failed (%d)\n", ret);
4641                 goto out;
4642         }
4643
4644         intel_mocs_init_l3cc_table(dev_priv);
4645
4646         /* Only when the HW is re-initialised, can we replay the requests */
4647         ret = __i915_gem_restart_engines(dev_priv);
4648         if (ret)
4649                 goto cleanup_uc;
4650
4651         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4652
4653         return 0;
4654
4655 cleanup_uc:
4656         intel_uc_fini_hw(dev_priv);
4657 out:
4658         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4659
4660         return ret;
4661 }
4662
4663 static int __intel_engines_record_defaults(struct drm_i915_private *i915)
4664 {
4665         struct i915_gem_context *ctx;
4666         struct intel_engine_cs *engine;
4667         enum intel_engine_id id;
4668         int err = 0;
4669
4670         /*
4671          * As we reset the gpu during very early sanitisation, the current
4672          * register state on the GPU should reflect its defaults values.
4673          * We load a context onto the hw (with restore-inhibit), then switch
4674          * over to a second context to save that default register state. We
4675          * can then prime every new context with that state so they all start
4676          * from the same default HW values.
4677          */
4678
4679         ctx = i915_gem_context_create_kernel(i915, 0);
4680         if (IS_ERR(ctx))
4681                 return PTR_ERR(ctx);
4682
4683         for_each_engine(engine, i915, id) {
4684                 struct i915_request *rq;
4685
4686                 rq = i915_request_alloc(engine, ctx);
4687                 if (IS_ERR(rq)) {
4688                         err = PTR_ERR(rq);
4689                         goto out_ctx;
4690                 }
4691
4692                 err = 0;
4693                 if (engine->init_context)
4694                         err = engine->init_context(rq);
4695
4696                 i915_request_add(rq);
4697                 if (err)
4698                         goto err_active;
4699         }
4700
4701         /* Flush the default context image to memory, and enable powersaving. */
4702         if (!load_power_context(i915)) {
4703                 err = -EIO;
4704                 goto err_active;
4705         }
4706
4707         for_each_engine(engine, i915, id) {
4708                 struct intel_context *ce;
4709                 struct i915_vma *state;
4710                 void *vaddr;
4711
4712                 ce = intel_context_lookup(ctx, engine);
4713                 if (!ce)
4714                         continue;
4715
4716                 state = ce->state;
4717                 if (!state)
4718                         continue;
4719
4720                 GEM_BUG_ON(intel_context_is_pinned(ce));
4721
4722                 /*
4723                  * As we will hold a reference to the logical state, it will
4724                  * not be torn down with the context, and importantly the
4725                  * object will hold onto its vma (making it possible for a
4726                  * stray GTT write to corrupt our defaults). Unmap the vma
4727                  * from the GTT to prevent such accidents and reclaim the
4728                  * space.
4729                  */
4730                 err = i915_vma_unbind(state);
4731                 if (err)
4732                         goto err_active;
4733
4734                 err = i915_gem_object_set_to_cpu_domain(state->obj, false);
4735                 if (err)
4736                         goto err_active;
4737
4738                 engine->default_state = i915_gem_object_get(state->obj);
4739                 i915_gem_object_set_cache_coherency(engine->default_state,
4740                                                     I915_CACHE_LLC);
4741
4742                 /* Check we can acquire the image of the context state */
4743                 vaddr = i915_gem_object_pin_map(engine->default_state,
4744                                                 I915_MAP_FORCE_WB);
4745                 if (IS_ERR(vaddr)) {
4746                         err = PTR_ERR(vaddr);
4747                         goto err_active;
4748                 }
4749
4750                 i915_gem_object_unpin_map(engine->default_state);
4751         }
4752
4753         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
4754                 unsigned int found = intel_engines_has_context_isolation(i915);
4755
4756                 /*
4757                  * Make sure that classes with multiple engine instances all
4758                  * share the same basic configuration.
4759                  */
4760                 for_each_engine(engine, i915, id) {
4761                         unsigned int bit = BIT(engine->uabi_class);
4762                         unsigned int expected = engine->default_state ? bit : 0;
4763
4764                         if ((found & bit) != expected) {
4765                                 DRM_ERROR("mismatching default context state for class %d on engine %s\n",
4766                                           engine->uabi_class, engine->name);
4767                         }
4768                 }
4769         }
4770
4771 out_ctx:
4772         i915_gem_context_set_closed(ctx);
4773         i915_gem_context_put(ctx);
4774         return err;
4775
4776 err_active:
4777         /*
4778          * If we have to abandon now, we expect the engines to be idle
4779          * and ready to be torn-down. The quickest way we can accomplish
4780          * this is by declaring ourselves wedged.
4781          */
4782         i915_gem_set_wedged(i915);
4783         goto out_ctx;
4784 }
4785
4786 static int
4787 i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size)
4788 {
4789         struct drm_i915_gem_object *obj;
4790         struct i915_vma *vma;
4791         int ret;
4792
4793         obj = i915_gem_object_create_stolen(i915, size);
4794         if (!obj)
4795                 obj = i915_gem_object_create_internal(i915, size);
4796         if (IS_ERR(obj)) {
4797                 DRM_ERROR("Failed to allocate scratch page\n");
4798                 return PTR_ERR(obj);
4799         }
4800
4801         vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
4802         if (IS_ERR(vma)) {
4803                 ret = PTR_ERR(vma);
4804                 goto err_unref;
4805         }
4806
4807         ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
4808         if (ret)
4809                 goto err_unref;
4810
4811         i915->gt.scratch = vma;
4812         return 0;
4813
4814 err_unref:
4815         i915_gem_object_put(obj);
4816         return ret;
4817 }
4818
4819 static void i915_gem_fini_scratch(struct drm_i915_private *i915)
4820 {
4821         i915_vma_unpin_and_release(&i915->gt.scratch, 0);
4822 }
4823
4824 int i915_gem_init(struct drm_i915_private *dev_priv)
4825 {
4826         int ret;
4827
4828         /* We need to fallback to 4K pages if host doesn't support huge gtt. */
4829         if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv))
4830                 mkwrite_device_info(dev_priv)->page_sizes =
4831                         I915_GTT_PAGE_SIZE_4K;
4832
4833         dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
4834
4835         if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
4836                 dev_priv->gt.resume = intel_lr_context_resume;
4837                 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
4838         } else {
4839                 dev_priv->gt.resume = intel_legacy_submission_resume;
4840                 dev_priv->gt.cleanup_engine = intel_engine_cleanup;
4841         }
4842
4843         i915_timelines_init(dev_priv);
4844
4845         ret = i915_gem_init_userptr(dev_priv);
4846         if (ret)
4847                 return ret;
4848
4849         ret = intel_uc_init_misc(dev_priv);
4850         if (ret)
4851                 return ret;
4852
4853         ret = intel_wopcm_init(&dev_priv->wopcm);
4854         if (ret)
4855                 goto err_uc_misc;
4856
4857         /* This is just a security blanket to placate dragons.
4858          * On some systems, we very sporadically observe that the first TLBs
4859          * used by the CS may be stale, despite us poking the TLB reset. If
4860          * we hold the forcewake during initialisation these problems
4861          * just magically go away.
4862          */
4863         mutex_lock(&dev_priv->drm.struct_mutex);
4864         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
4865
4866         ret = i915_gem_init_ggtt(dev_priv);
4867         if (ret) {
4868                 GEM_BUG_ON(ret == -EIO);
4869                 goto err_unlock;
4870         }
4871
4872         ret = i915_gem_init_scratch(dev_priv,
4873                                     IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE);
4874         if (ret) {
4875                 GEM_BUG_ON(ret == -EIO);
4876                 goto err_ggtt;
4877         }
4878
4879         ret = i915_gem_contexts_init(dev_priv);
4880         if (ret) {
4881                 GEM_BUG_ON(ret == -EIO);
4882                 goto err_scratch;
4883         }
4884
4885         ret = intel_engines_init(dev_priv);
4886         if (ret) {
4887                 GEM_BUG_ON(ret == -EIO);
4888                 goto err_context;
4889         }
4890
4891         intel_init_gt_powersave(dev_priv);
4892
4893         ret = intel_uc_init(dev_priv);
4894         if (ret)
4895                 goto err_pm;
4896
4897         ret = i915_gem_init_hw(dev_priv);
4898         if (ret)
4899                 goto err_uc_init;
4900
4901         /*
4902          * Despite its name intel_init_clock_gating applies both display
4903          * clock gating workarounds; GT mmio workarounds and the occasional
4904          * GT power context workaround. Worse, sometimes it includes a context
4905          * register workaround which we need to apply before we record the
4906          * default HW state for all contexts.
4907          *
4908          * FIXME: break up the workarounds and apply them at the right time!
4909          */
4910         intel_init_clock_gating(dev_priv);
4911
4912         ret = __intel_engines_record_defaults(dev_priv);
4913         if (ret)
4914                 goto err_init_hw;
4915
4916         if (i915_inject_load_failure()) {
4917                 ret = -ENODEV;
4918                 goto err_init_hw;
4919         }
4920
4921         if (i915_inject_load_failure()) {
4922                 ret = -EIO;
4923                 goto err_init_hw;
4924         }
4925
4926         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4927         mutex_unlock(&dev_priv->drm.struct_mutex);
4928
4929         return 0;
4930
4931         /*
4932          * Unwinding is complicated by that we want to handle -EIO to mean
4933          * disable GPU submission but keep KMS alive. We want to mark the
4934          * HW as irrevisibly wedged, but keep enough state around that the
4935          * driver doesn't explode during runtime.
4936          */
4937 err_init_hw:
4938         mutex_unlock(&dev_priv->drm.struct_mutex);
4939
4940         i915_gem_suspend(dev_priv);
4941         i915_gem_suspend_late(dev_priv);
4942
4943         i915_gem_drain_workqueue(dev_priv);
4944
4945         mutex_lock(&dev_priv->drm.struct_mutex);
4946         intel_uc_fini_hw(dev_priv);
4947 err_uc_init:
4948         intel_uc_fini(dev_priv);
4949 err_pm:
4950         if (ret != -EIO) {
4951                 intel_cleanup_gt_powersave(dev_priv);
4952                 i915_gem_cleanup_engines(dev_priv);
4953         }
4954 err_context:
4955         if (ret != -EIO)
4956                 i915_gem_contexts_fini(dev_priv);
4957 err_scratch:
4958         i915_gem_fini_scratch(dev_priv);
4959 err_ggtt:
4960 err_unlock:
4961         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4962         mutex_unlock(&dev_priv->drm.struct_mutex);
4963
4964 err_uc_misc:
4965         intel_uc_fini_misc(dev_priv);
4966
4967         if (ret != -EIO) {
4968                 i915_gem_cleanup_userptr(dev_priv);
4969                 i915_timelines_fini(dev_priv);
4970         }
4971
4972         if (ret == -EIO) {
4973                 mutex_lock(&dev_priv->drm.struct_mutex);
4974
4975                 /*
4976                  * Allow engine initialisation to fail by marking the GPU as
4977                  * wedged. But we only want to do this where the GPU is angry,
4978                  * for all other failure, such as an allocation failure, bail.
4979                  */
4980                 if (!i915_reset_failed(dev_priv)) {
4981                         i915_load_error(dev_priv,
4982                                         "Failed to initialize GPU, declaring it wedged!\n");
4983                         i915_gem_set_wedged(dev_priv);
4984                 }
4985
4986                 /* Minimal basic recovery for KMS */
4987                 ret = i915_ggtt_enable_hw(dev_priv);
4988                 i915_gem_restore_gtt_mappings(dev_priv);
4989                 i915_gem_restore_fences(dev_priv);
4990                 intel_init_clock_gating(dev_priv);
4991
4992                 mutex_unlock(&dev_priv->drm.struct_mutex);
4993         }
4994
4995         i915_gem_drain_freed_objects(dev_priv);
4996         return ret;
4997 }
4998
4999 void i915_gem_fini(struct drm_i915_private *dev_priv)
5000 {
5001         i915_gem_suspend_late(dev_priv);
5002         intel_disable_gt_powersave(dev_priv);
5003
5004         /* Flush any outstanding unpin_work. */
5005         i915_gem_drain_workqueue(dev_priv);
5006
5007         mutex_lock(&dev_priv->drm.struct_mutex);
5008         intel_uc_fini_hw(dev_priv);
5009         intel_uc_fini(dev_priv);
5010         i915_gem_cleanup_engines(dev_priv);
5011         i915_gem_contexts_fini(dev_priv);
5012         i915_gem_fini_scratch(dev_priv);
5013         mutex_unlock(&dev_priv->drm.struct_mutex);
5014
5015         intel_wa_list_free(&dev_priv->gt_wa_list);
5016
5017         intel_cleanup_gt_powersave(dev_priv);
5018
5019         intel_uc_fini_misc(dev_priv);
5020         i915_gem_cleanup_userptr(dev_priv);
5021         i915_timelines_fini(dev_priv);
5022
5023         i915_gem_drain_freed_objects(dev_priv);
5024
5025         WARN_ON(!list_empty(&dev_priv->contexts.list));
5026 }
5027
5028 void i915_gem_init_mmio(struct drm_i915_private *i915)
5029 {
5030         i915_gem_sanitize(i915);
5031 }
5032
5033 void
5034 i915_gem_cleanup_engines(struct drm_i915_private *dev_priv)
5035 {
5036         struct intel_engine_cs *engine;
5037         enum intel_engine_id id;
5038
5039         for_each_engine(engine, dev_priv, id)
5040                 dev_priv->gt.cleanup_engine(engine);
5041 }
5042
5043 void
5044 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
5045 {
5046         int i;
5047
5048         if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) &&
5049             !IS_CHERRYVIEW(dev_priv))
5050                 dev_priv->num_fence_regs = 32;
5051         else if (INTEL_GEN(dev_priv) >= 4 ||
5052                  IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
5053                  IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
5054                 dev_priv->num_fence_regs = 16;
5055         else
5056                 dev_priv->num_fence_regs = 8;
5057
5058         if (intel_vgpu_active(dev_priv))
5059                 dev_priv->num_fence_regs =
5060                                 I915_READ(vgtif_reg(avail_rs.fence_num));
5061
5062         /* Initialize fence registers to zero */
5063         for (i = 0; i < dev_priv->num_fence_regs; i++) {
5064                 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
5065
5066                 fence->i915 = dev_priv;
5067                 fence->id = i;
5068                 list_add_tail(&fence->link, &dev_priv->mm.fence_list);
5069         }
5070         i915_gem_restore_fences(dev_priv);
5071
5072         i915_gem_detect_bit_6_swizzle(dev_priv);
5073 }
5074
5075 static void i915_gem_init__mm(struct drm_i915_private *i915)
5076 {
5077         spin_lock_init(&i915->mm.object_stat_lock);
5078         spin_lock_init(&i915->mm.obj_lock);
5079         spin_lock_init(&i915->mm.free_lock);
5080
5081         init_llist_head(&i915->mm.free_list);
5082
5083         INIT_LIST_HEAD(&i915->mm.unbound_list);
5084         INIT_LIST_HEAD(&i915->mm.bound_list);
5085         INIT_LIST_HEAD(&i915->mm.fence_list);
5086         INIT_LIST_HEAD(&i915->mm.userfault_list);
5087
5088         INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
5089 }
5090
5091 int i915_gem_init_early(struct drm_i915_private *dev_priv)
5092 {
5093         int err;
5094
5095         INIT_LIST_HEAD(&dev_priv->gt.active_rings);
5096         INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
5097
5098         i915_gem_init__mm(dev_priv);
5099
5100         INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
5101                           i915_gem_retire_work_handler);
5102         INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
5103                           i915_gem_idle_work_handler);
5104         init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
5105         init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
5106         mutex_init(&dev_priv->gpu_error.wedge_mutex);
5107         init_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
5108
5109         atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
5110
5111         spin_lock_init(&dev_priv->fb_tracking.lock);
5112
5113         err = i915_gemfs_init(dev_priv);
5114         if (err)
5115                 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err);
5116
5117         return 0;
5118 }
5119
5120 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
5121 {
5122         i915_gem_drain_freed_objects(dev_priv);
5123         GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
5124         GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
5125         WARN_ON(dev_priv->mm.object_count);
5126
5127         cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
5128
5129         i915_gemfs_fini(dev_priv);
5130 }
5131
5132 int i915_gem_freeze(struct drm_i915_private *dev_priv)
5133 {
5134         /* Discard all purgeable objects, let userspace recover those as
5135          * required after resuming.
5136          */
5137         i915_gem_shrink_all(dev_priv);
5138
5139         return 0;
5140 }
5141
5142 int i915_gem_freeze_late(struct drm_i915_private *i915)
5143 {
5144         struct drm_i915_gem_object *obj;
5145         struct list_head *phases[] = {
5146                 &i915->mm.unbound_list,
5147                 &i915->mm.bound_list,
5148                 NULL
5149         }, **phase;
5150
5151         /*
5152          * Called just before we write the hibernation image.
5153          *
5154          * We need to update the domain tracking to reflect that the CPU
5155          * will be accessing all the pages to create and restore from the
5156          * hibernation, and so upon restoration those pages will be in the
5157          * CPU domain.
5158          *
5159          * To make sure the hibernation image contains the latest state,
5160          * we update that state just before writing out the image.
5161          *
5162          * To try and reduce the hibernation image, we manually shrink
5163          * the objects as well, see i915_gem_freeze()
5164          */
5165
5166         i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND);
5167         i915_gem_drain_freed_objects(i915);
5168
5169         mutex_lock(&i915->drm.struct_mutex);
5170         for (phase = phases; *phase; phase++) {
5171                 list_for_each_entry(obj, *phase, mm.link)
5172                         WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true));
5173         }
5174         mutex_unlock(&i915->drm.struct_mutex);
5175
5176         return 0;
5177 }
5178
5179 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
5180 {
5181         struct drm_i915_file_private *file_priv = file->driver_priv;
5182         struct i915_request *request;
5183
5184         /* Clean up our request list when the client is going away, so that
5185          * later retire_requests won't dereference our soon-to-be-gone
5186          * file_priv.
5187          */
5188         spin_lock(&file_priv->mm.lock);
5189         list_for_each_entry(request, &file_priv->mm.request_list, client_link)
5190                 request->file_priv = NULL;
5191         spin_unlock(&file_priv->mm.lock);
5192 }
5193
5194 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
5195 {
5196         struct drm_i915_file_private *file_priv;
5197         int ret;
5198
5199         DRM_DEBUG("\n");
5200
5201         file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
5202         if (!file_priv)
5203                 return -ENOMEM;
5204
5205         file->driver_priv = file_priv;
5206         file_priv->dev_priv = i915;
5207         file_priv->file = file;
5208
5209         spin_lock_init(&file_priv->mm.lock);
5210         INIT_LIST_HEAD(&file_priv->mm.request_list);
5211
5212         file_priv->bsd_engine = -1;
5213         file_priv->hang_timestamp = jiffies;
5214
5215         ret = i915_gem_context_open(i915, file);
5216         if (ret)
5217                 kfree(file_priv);
5218
5219         return ret;
5220 }
5221
5222 /**
5223  * i915_gem_track_fb - update frontbuffer tracking
5224  * @old: current GEM buffer for the frontbuffer slots
5225  * @new: new GEM buffer for the frontbuffer slots
5226  * @frontbuffer_bits: bitmask of frontbuffer slots
5227  *
5228  * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
5229  * from @old and setting them in @new. Both @old and @new can be NULL.
5230  */
5231 void i915_gem_track_fb(struct drm_i915_gem_object *old,
5232                        struct drm_i915_gem_object *new,
5233                        unsigned frontbuffer_bits)
5234 {
5235         /* Control of individual bits within the mask are guarded by
5236          * the owning plane->mutex, i.e. we can never see concurrent
5237          * manipulation of individual bits. But since the bitfield as a whole
5238          * is updated using RMW, we need to use atomics in order to update
5239          * the bits.
5240          */
5241         BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
5242                      BITS_PER_TYPE(atomic_t));
5243
5244         if (old) {
5245                 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
5246                 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
5247         }
5248
5249         if (new) {
5250                 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
5251                 atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
5252         }
5253 }
5254
5255 /* Allocate a new GEM object and fill it with the supplied data */
5256 struct drm_i915_gem_object *
5257 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
5258                                  const void *data, size_t size)
5259 {
5260         struct drm_i915_gem_object *obj;
5261         struct file *file;
5262         size_t offset;
5263         int err;
5264
5265         obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE));
5266         if (IS_ERR(obj))
5267                 return obj;
5268
5269         GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
5270
5271         file = obj->base.filp;
5272         offset = 0;
5273         do {
5274                 unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
5275                 struct page *page;
5276                 void *pgdata, *vaddr;
5277
5278                 err = pagecache_write_begin(file, file->f_mapping,
5279                                             offset, len, 0,
5280                                             &page, &pgdata);
5281                 if (err < 0)
5282                         goto fail;
5283
5284                 vaddr = kmap(page);
5285                 memcpy(vaddr, data, len);
5286                 kunmap(page);
5287
5288                 err = pagecache_write_end(file, file->f_mapping,
5289                                           offset, len, len,
5290                                           page, pgdata);
5291                 if (err < 0)
5292                         goto fail;
5293
5294                 size -= len;
5295                 data += len;
5296                 offset += len;
5297         } while (size);
5298
5299         return obj;
5300
5301 fail:
5302         i915_gem_object_put(obj);
5303         return ERR_PTR(err);
5304 }
5305
5306 struct scatterlist *
5307 i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
5308                        unsigned int n,
5309                        unsigned int *offset)
5310 {
5311         struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
5312         struct scatterlist *sg;
5313         unsigned int idx, count;
5314
5315         might_sleep();
5316         GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
5317         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
5318
5319         /* As we iterate forward through the sg, we record each entry in a
5320          * radixtree for quick repeated (backwards) lookups. If we have seen
5321          * this index previously, we will have an entry for it.
5322          *
5323          * Initial lookup is O(N), but this is amortized to O(1) for
5324          * sequential page access (where each new request is consecutive
5325          * to the previous one). Repeated lookups are O(lg(obj->base.size)),
5326          * i.e. O(1) with a large constant!
5327          */
5328         if (n < READ_ONCE(iter->sg_idx))
5329                 goto lookup;
5330
5331         mutex_lock(&iter->lock);
5332
5333         /* We prefer to reuse the last sg so that repeated lookup of this
5334          * (or the subsequent) sg are fast - comparing against the last
5335          * sg is faster than going through the radixtree.
5336          */
5337
5338         sg = iter->sg_pos;
5339         idx = iter->sg_idx;
5340         count = __sg_page_count(sg);
5341
5342         while (idx + count <= n) {
5343                 void *entry;
5344                 unsigned long i;
5345                 int ret;
5346
5347                 /* If we cannot allocate and insert this entry, or the
5348                  * individual pages from this range, cancel updating the
5349                  * sg_idx so that on this lookup we are forced to linearly
5350                  * scan onwards, but on future lookups we will try the
5351                  * insertion again (in which case we need to be careful of
5352                  * the error return reporting that we have already inserted
5353                  * this index).
5354                  */
5355                 ret = radix_tree_insert(&iter->radix, idx, sg);
5356                 if (ret && ret != -EEXIST)
5357                         goto scan;
5358
5359                 entry = xa_mk_value(idx);
5360                 for (i = 1; i < count; i++) {
5361                         ret = radix_tree_insert(&iter->radix, idx + i, entry);
5362                         if (ret && ret != -EEXIST)
5363                                 goto scan;
5364                 }
5365
5366                 idx += count;
5367                 sg = ____sg_next(sg);
5368                 count = __sg_page_count(sg);
5369         }
5370
5371 scan:
5372         iter->sg_pos = sg;
5373         iter->sg_idx = idx;
5374
5375         mutex_unlock(&iter->lock);
5376
5377         if (unlikely(n < idx)) /* insertion completed by another thread */
5378                 goto lookup;
5379
5380         /* In case we failed to insert the entry into the radixtree, we need
5381          * to look beyond the current sg.
5382          */
5383         while (idx + count <= n) {
5384                 idx += count;
5385                 sg = ____sg_next(sg);
5386                 count = __sg_page_count(sg);
5387         }
5388
5389         *offset = n - idx;
5390         return sg;
5391
5392 lookup:
5393         rcu_read_lock();
5394
5395         sg = radix_tree_lookup(&iter->radix, n);
5396         GEM_BUG_ON(!sg);
5397
5398         /* If this index is in the middle of multi-page sg entry,
5399          * the radix tree will contain a value entry that points
5400          * to the start of that range. We will return the pointer to
5401          * the base page and the offset of this page within the
5402          * sg entry's range.
5403          */
5404         *offset = 0;
5405         if (unlikely(xa_is_value(sg))) {
5406                 unsigned long base = xa_to_value(sg);
5407
5408                 sg = radix_tree_lookup(&iter->radix, base);
5409                 GEM_BUG_ON(!sg);
5410
5411                 *offset = n - base;
5412         }
5413
5414         rcu_read_unlock();
5415
5416         return sg;
5417 }
5418
5419 struct page *
5420 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
5421 {
5422         struct scatterlist *sg;
5423         unsigned int offset;
5424
5425         GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
5426
5427         sg = i915_gem_object_get_sg(obj, n, &offset);
5428         return nth_page(sg_page(sg), offset);
5429 }
5430
5431 /* Like i915_gem_object_get_page(), but mark the returned page dirty */
5432 struct page *
5433 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
5434                                unsigned int n)
5435 {
5436         struct page *page;
5437
5438         page = i915_gem_object_get_page(obj, n);
5439         if (!obj->mm.dirty)
5440                 set_page_dirty(page);
5441
5442         return page;
5443 }
5444
5445 dma_addr_t
5446 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
5447                                 unsigned long n)
5448 {
5449         struct scatterlist *sg;
5450         unsigned int offset;
5451
5452         sg = i915_gem_object_get_sg(obj, n, &offset);
5453         return sg_dma_address(sg) + (offset << PAGE_SHIFT);
5454 }
5455
5456 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
5457 {
5458         struct sg_table *pages;
5459         int err;
5460
5461         if (align > obj->base.size)
5462                 return -EINVAL;
5463
5464         if (obj->ops == &i915_gem_phys_ops)
5465                 return 0;
5466
5467         if (obj->ops != &i915_gem_object_ops)
5468                 return -EINVAL;
5469
5470         err = i915_gem_object_unbind(obj);
5471         if (err)
5472                 return err;
5473
5474         mutex_lock(&obj->mm.lock);
5475
5476         if (obj->mm.madv != I915_MADV_WILLNEED) {
5477                 err = -EFAULT;
5478                 goto err_unlock;
5479         }
5480
5481         if (obj->mm.quirked) {
5482                 err = -EFAULT;
5483                 goto err_unlock;
5484         }
5485
5486         if (obj->mm.mapping) {
5487                 err = -EBUSY;
5488                 goto err_unlock;
5489         }
5490
5491         pages = __i915_gem_object_unset_pages(obj);
5492
5493         obj->ops = &i915_gem_phys_ops;
5494
5495         err = ____i915_gem_object_get_pages(obj);
5496         if (err)
5497                 goto err_xfer;
5498
5499         /* Perma-pin (until release) the physical set of pages */
5500         __i915_gem_object_pin_pages(obj);
5501
5502         if (!IS_ERR_OR_NULL(pages))
5503                 i915_gem_object_ops.put_pages(obj, pages);
5504         mutex_unlock(&obj->mm.lock);
5505         return 0;
5506
5507 err_xfer:
5508         obj->ops = &i915_gem_object_ops;
5509         if (!IS_ERR_OR_NULL(pages)) {
5510                 unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
5511
5512                 __i915_gem_object_set_pages(obj, pages, sg_page_sizes);
5513         }
5514 err_unlock:
5515         mutex_unlock(&obj->mm.lock);
5516         return err;
5517 }
5518
5519 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5520 #include "selftests/scatterlist.c"
5521 #include "selftests/mock_gem_device.c"
5522 #include "selftests/huge_gem_object.c"
5523 #include "selftests/huge_pages.c"
5524 #include "selftests/i915_gem_object.c"
5525 #include "selftests/i915_gem_coherency.c"
5526 #include "selftests/i915_gem.c"
5527 #endif