]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/gpu/drm/i915/i915_gem_gtt.c
Merge tag 'for-linus' of git://git.armlinux.org.uk/~rmk/linux-arm
[linux.git] / drivers / gpu / drm / i915 / i915_gem_gtt.c
1 /*
2  * Copyright © 2010 Daniel Vetter
3  * Copyright © 2011-2014 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  *
24  */
25
26 #include <linux/slab.h> /* fault-inject.h is not standalone! */
27
28 #include <linux/fault-inject.h>
29 #include <linux/log2.h>
30 #include <linux/random.h>
31 #include <linux/seq_file.h>
32 #include <linux/stop_machine.h>
33
34 #include <asm/set_memory.h>
35
36 #include <drm/i915_drm.h>
37
38 #include "i915_drv.h"
39 #include "i915_vgpu.h"
40 #include "i915_reset.h"
41 #include "i915_trace.h"
42 #include "intel_drv.h"
43 #include "intel_frontbuffer.h"
44
45 #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
46
47 /**
48  * DOC: Global GTT views
49  *
50  * Background and previous state
51  *
52  * Historically objects could exists (be bound) in global GTT space only as
53  * singular instances with a view representing all of the object's backing pages
54  * in a linear fashion. This view will be called a normal view.
55  *
56  * To support multiple views of the same object, where the number of mapped
57  * pages is not equal to the backing store, or where the layout of the pages
58  * is not linear, concept of a GGTT view was added.
59  *
60  * One example of an alternative view is a stereo display driven by a single
61  * image. In this case we would have a framebuffer looking like this
62  * (2x2 pages):
63  *
64  *    12
65  *    34
66  *
67  * Above would represent a normal GGTT view as normally mapped for GPU or CPU
68  * rendering. In contrast, fed to the display engine would be an alternative
69  * view which could look something like this:
70  *
71  *   1212
72  *   3434
73  *
74  * In this example both the size and layout of pages in the alternative view is
75  * different from the normal view.
76  *
77  * Implementation and usage
78  *
79  * GGTT views are implemented using VMAs and are distinguished via enum
80  * i915_ggtt_view_type and struct i915_ggtt_view.
81  *
82  * A new flavour of core GEM functions which work with GGTT bound objects were
83  * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
84  * renaming  in large amounts of code. They take the struct i915_ggtt_view
85  * parameter encapsulating all metadata required to implement a view.
86  *
87  * As a helper for callers which are only interested in the normal view,
88  * globally const i915_ggtt_view_normal singleton instance exists. All old core
89  * GEM API functions, the ones not taking the view parameter, are operating on,
90  * or with the normal GGTT view.
91  *
92  * Code wanting to add or use a new GGTT view needs to:
93  *
94  * 1. Add a new enum with a suitable name.
95  * 2. Extend the metadata in the i915_ggtt_view structure if required.
96  * 3. Add support to i915_get_vma_pages().
97  *
98  * New views are required to build a scatter-gather table from within the
99  * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
100  * exists for the lifetime of an VMA.
101  *
102  * Core API is designed to have copy semantics which means that passed in
103  * struct i915_ggtt_view does not need to be persistent (left around after
104  * calling the core API functions).
105  *
106  */
107
108 static int
109 i915_get_ggtt_vma_pages(struct i915_vma *vma);
110
111 static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv)
112 {
113         /*
114          * Note that as an uncached mmio write, this will flush the
115          * WCB of the writes into the GGTT before it triggers the invalidate.
116          */
117         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
118 }
119
120 static void guc_ggtt_invalidate(struct drm_i915_private *dev_priv)
121 {
122         gen6_ggtt_invalidate(dev_priv);
123         I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
124 }
125
126 static void gmch_ggtt_invalidate(struct drm_i915_private *dev_priv)
127 {
128         intel_gtt_chipset_flush();
129 }
130
131 static inline void i915_ggtt_invalidate(struct drm_i915_private *i915)
132 {
133         i915->ggtt.invalidate(i915);
134 }
135
136 static int ppgtt_bind_vma(struct i915_vma *vma,
137                           enum i915_cache_level cache_level,
138                           u32 unused)
139 {
140         u32 pte_flags;
141         int err;
142
143         if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
144                 err = vma->vm->allocate_va_range(vma->vm,
145                                                  vma->node.start, vma->size);
146                 if (err)
147                         return err;
148         }
149
150         /* Applicable to VLV, and gen8+ */
151         pte_flags = 0;
152         if (i915_gem_object_is_readonly(vma->obj))
153                 pte_flags |= PTE_READ_ONLY;
154
155         vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
156
157         return 0;
158 }
159
160 static void ppgtt_unbind_vma(struct i915_vma *vma)
161 {
162         vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
163 }
164
165 static int ppgtt_set_pages(struct i915_vma *vma)
166 {
167         GEM_BUG_ON(vma->pages);
168
169         vma->pages = vma->obj->mm.pages;
170
171         vma->page_sizes = vma->obj->mm.page_sizes;
172
173         return 0;
174 }
175
176 static void clear_pages(struct i915_vma *vma)
177 {
178         GEM_BUG_ON(!vma->pages);
179
180         if (vma->pages != vma->obj->mm.pages) {
181                 sg_free_table(vma->pages);
182                 kfree(vma->pages);
183         }
184         vma->pages = NULL;
185
186         memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
187 }
188
189 static u64 gen8_pte_encode(dma_addr_t addr,
190                            enum i915_cache_level level,
191                            u32 flags)
192 {
193         gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
194
195         if (unlikely(flags & PTE_READ_ONLY))
196                 pte &= ~_PAGE_RW;
197
198         switch (level) {
199         case I915_CACHE_NONE:
200                 pte |= PPAT_UNCACHED;
201                 break;
202         case I915_CACHE_WT:
203                 pte |= PPAT_DISPLAY_ELLC;
204                 break;
205         default:
206                 pte |= PPAT_CACHED;
207                 break;
208         }
209
210         return pte;
211 }
212
213 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
214                                   const enum i915_cache_level level)
215 {
216         gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
217         pde |= addr;
218         if (level != I915_CACHE_NONE)
219                 pde |= PPAT_CACHED_PDE;
220         else
221                 pde |= PPAT_UNCACHED;
222         return pde;
223 }
224
225 #define gen8_pdpe_encode gen8_pde_encode
226 #define gen8_pml4e_encode gen8_pde_encode
227
228 static u64 snb_pte_encode(dma_addr_t addr,
229                           enum i915_cache_level level,
230                           u32 flags)
231 {
232         gen6_pte_t pte = GEN6_PTE_VALID;
233         pte |= GEN6_PTE_ADDR_ENCODE(addr);
234
235         switch (level) {
236         case I915_CACHE_L3_LLC:
237         case I915_CACHE_LLC:
238                 pte |= GEN6_PTE_CACHE_LLC;
239                 break;
240         case I915_CACHE_NONE:
241                 pte |= GEN6_PTE_UNCACHED;
242                 break;
243         default:
244                 MISSING_CASE(level);
245         }
246
247         return pte;
248 }
249
250 static u64 ivb_pte_encode(dma_addr_t addr,
251                           enum i915_cache_level level,
252                           u32 flags)
253 {
254         gen6_pte_t pte = GEN6_PTE_VALID;
255         pte |= GEN6_PTE_ADDR_ENCODE(addr);
256
257         switch (level) {
258         case I915_CACHE_L3_LLC:
259                 pte |= GEN7_PTE_CACHE_L3_LLC;
260                 break;
261         case I915_CACHE_LLC:
262                 pte |= GEN6_PTE_CACHE_LLC;
263                 break;
264         case I915_CACHE_NONE:
265                 pte |= GEN6_PTE_UNCACHED;
266                 break;
267         default:
268                 MISSING_CASE(level);
269         }
270
271         return pte;
272 }
273
274 static u64 byt_pte_encode(dma_addr_t addr,
275                           enum i915_cache_level level,
276                           u32 flags)
277 {
278         gen6_pte_t pte = GEN6_PTE_VALID;
279         pte |= GEN6_PTE_ADDR_ENCODE(addr);
280
281         if (!(flags & PTE_READ_ONLY))
282                 pte |= BYT_PTE_WRITEABLE;
283
284         if (level != I915_CACHE_NONE)
285                 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
286
287         return pte;
288 }
289
290 static u64 hsw_pte_encode(dma_addr_t addr,
291                           enum i915_cache_level level,
292                           u32 flags)
293 {
294         gen6_pte_t pte = GEN6_PTE_VALID;
295         pte |= HSW_PTE_ADDR_ENCODE(addr);
296
297         if (level != I915_CACHE_NONE)
298                 pte |= HSW_WB_LLC_AGE3;
299
300         return pte;
301 }
302
303 static u64 iris_pte_encode(dma_addr_t addr,
304                            enum i915_cache_level level,
305                            u32 flags)
306 {
307         gen6_pte_t pte = GEN6_PTE_VALID;
308         pte |= HSW_PTE_ADDR_ENCODE(addr);
309
310         switch (level) {
311         case I915_CACHE_NONE:
312                 break;
313         case I915_CACHE_WT:
314                 pte |= HSW_WT_ELLC_LLC_AGE3;
315                 break;
316         default:
317                 pte |= HSW_WB_ELLC_LLC_AGE3;
318                 break;
319         }
320
321         return pte;
322 }
323
324 static void stash_init(struct pagestash *stash)
325 {
326         pagevec_init(&stash->pvec);
327         spin_lock_init(&stash->lock);
328 }
329
330 static struct page *stash_pop_page(struct pagestash *stash)
331 {
332         struct page *page = NULL;
333
334         spin_lock(&stash->lock);
335         if (likely(stash->pvec.nr))
336                 page = stash->pvec.pages[--stash->pvec.nr];
337         spin_unlock(&stash->lock);
338
339         return page;
340 }
341
342 static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec)
343 {
344         int nr;
345
346         spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING);
347
348         nr = min_t(int, pvec->nr, pagevec_space(&stash->pvec));
349         memcpy(stash->pvec.pages + stash->pvec.nr,
350                pvec->pages + pvec->nr - nr,
351                sizeof(pvec->pages[0]) * nr);
352         stash->pvec.nr += nr;
353
354         spin_unlock(&stash->lock);
355
356         pvec->nr -= nr;
357 }
358
359 static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
360 {
361         struct pagevec stack;
362         struct page *page;
363
364         if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
365                 i915_gem_shrink_all(vm->i915);
366
367         page = stash_pop_page(&vm->free_pages);
368         if (page)
369                 return page;
370
371         if (!vm->pt_kmap_wc)
372                 return alloc_page(gfp);
373
374         /* Look in our global stash of WC pages... */
375         page = stash_pop_page(&vm->i915->mm.wc_stash);
376         if (page)
377                 return page;
378
379         /*
380          * Otherwise batch allocate pages to amortize cost of set_pages_wc.
381          *
382          * We have to be careful as page allocation may trigger the shrinker
383          * (via direct reclaim) which will fill up the WC stash underneath us.
384          * So we add our WB pages into a temporary pvec on the stack and merge
385          * them into the WC stash after all the allocations are complete.
386          */
387         pagevec_init(&stack);
388         do {
389                 struct page *page;
390
391                 page = alloc_page(gfp);
392                 if (unlikely(!page))
393                         break;
394
395                 stack.pages[stack.nr++] = page;
396         } while (pagevec_space(&stack));
397
398         if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) {
399                 page = stack.pages[--stack.nr];
400
401                 /* Merge spare WC pages to the global stash */
402                 stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
403
404                 /* Push any surplus WC pages onto the local VM stash */
405                 if (stack.nr)
406                         stash_push_pagevec(&vm->free_pages, &stack);
407         }
408
409         /* Return unwanted leftovers */
410         if (unlikely(stack.nr)) {
411                 WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr));
412                 __pagevec_release(&stack);
413         }
414
415         return page;
416 }
417
418 static void vm_free_pages_release(struct i915_address_space *vm,
419                                   bool immediate)
420 {
421         struct pagevec *pvec = &vm->free_pages.pvec;
422         struct pagevec stack;
423
424         lockdep_assert_held(&vm->free_pages.lock);
425         GEM_BUG_ON(!pagevec_count(pvec));
426
427         if (vm->pt_kmap_wc) {
428                 /*
429                  * When we use WC, first fill up the global stash and then
430                  * only if full immediately free the overflow.
431                  */
432                 stash_push_pagevec(&vm->i915->mm.wc_stash, pvec);
433
434                 /*
435                  * As we have made some room in the VM's free_pages,
436                  * we can wait for it to fill again. Unless we are
437                  * inside i915_address_space_fini() and must
438                  * immediately release the pages!
439                  */
440                 if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1))
441                         return;
442
443                 /*
444                  * We have to drop the lock to allow ourselves to sleep,
445                  * so take a copy of the pvec and clear the stash for
446                  * others to use it as we sleep.
447                  */
448                 stack = *pvec;
449                 pagevec_reinit(pvec);
450                 spin_unlock(&vm->free_pages.lock);
451
452                 pvec = &stack;
453                 set_pages_array_wb(pvec->pages, pvec->nr);
454
455                 spin_lock(&vm->free_pages.lock);
456         }
457
458         __pagevec_release(pvec);
459 }
460
461 static void vm_free_page(struct i915_address_space *vm, struct page *page)
462 {
463         /*
464          * On !llc, we need to change the pages back to WB. We only do so
465          * in bulk, so we rarely need to change the page attributes here,
466          * but doing so requires a stop_machine() from deep inside arch/x86/mm.
467          * To make detection of the possible sleep more likely, use an
468          * unconditional might_sleep() for everybody.
469          */
470         might_sleep();
471         spin_lock(&vm->free_pages.lock);
472         if (!pagevec_add(&vm->free_pages.pvec, page))
473                 vm_free_pages_release(vm, false);
474         spin_unlock(&vm->free_pages.lock);
475 }
476
477 static void i915_address_space_init(struct i915_address_space *vm, int subclass)
478 {
479         /*
480          * The vm->mutex must be reclaim safe (for use in the shrinker).
481          * Do a dummy acquire now under fs_reclaim so that any allocation
482          * attempt holding the lock is immediately reported by lockdep.
483          */
484         mutex_init(&vm->mutex);
485         lockdep_set_subclass(&vm->mutex, subclass);
486         i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
487
488         GEM_BUG_ON(!vm->total);
489         drm_mm_init(&vm->mm, 0, vm->total);
490         vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
491
492         stash_init(&vm->free_pages);
493
494         INIT_LIST_HEAD(&vm->unbound_list);
495         INIT_LIST_HEAD(&vm->bound_list);
496 }
497
498 static void i915_address_space_fini(struct i915_address_space *vm)
499 {
500         spin_lock(&vm->free_pages.lock);
501         if (pagevec_count(&vm->free_pages.pvec))
502                 vm_free_pages_release(vm, true);
503         GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
504         spin_unlock(&vm->free_pages.lock);
505
506         drm_mm_takedown(&vm->mm);
507
508         mutex_destroy(&vm->mutex);
509 }
510
511 static int __setup_page_dma(struct i915_address_space *vm,
512                             struct i915_page_dma *p,
513                             gfp_t gfp)
514 {
515         p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL);
516         if (unlikely(!p->page))
517                 return -ENOMEM;
518
519         p->daddr = dma_map_page_attrs(vm->dma,
520                                       p->page, 0, PAGE_SIZE,
521                                       PCI_DMA_BIDIRECTIONAL,
522                                       DMA_ATTR_SKIP_CPU_SYNC |
523                                       DMA_ATTR_NO_WARN);
524         if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
525                 vm_free_page(vm, p->page);
526                 return -ENOMEM;
527         }
528
529         return 0;
530 }
531
532 static int setup_page_dma(struct i915_address_space *vm,
533                           struct i915_page_dma *p)
534 {
535         return __setup_page_dma(vm, p, __GFP_HIGHMEM);
536 }
537
538 static void cleanup_page_dma(struct i915_address_space *vm,
539                              struct i915_page_dma *p)
540 {
541         dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
542         vm_free_page(vm, p->page);
543 }
544
545 #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
546
547 #define setup_px(vm, px) setup_page_dma((vm), px_base(px))
548 #define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px))
549 #define fill_px(vm, px, v) fill_page_dma((vm), px_base(px), (v))
550 #define fill32_px(vm, px, v) fill_page_dma_32((vm), px_base(px), (v))
551
552 static void fill_page_dma(struct i915_address_space *vm,
553                           struct i915_page_dma *p,
554                           const u64 val)
555 {
556         u64 * const vaddr = kmap_atomic(p->page);
557
558         memset64(vaddr, val, PAGE_SIZE / sizeof(val));
559
560         kunmap_atomic(vaddr);
561 }
562
563 static void fill_page_dma_32(struct i915_address_space *vm,
564                              struct i915_page_dma *p,
565                              const u32 v)
566 {
567         fill_page_dma(vm, p, (u64)v << 32 | v);
568 }
569
570 static int
571 setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
572 {
573         unsigned long size;
574
575         /*
576          * In order to utilize 64K pages for an object with a size < 2M, we will
577          * need to support a 64K scratch page, given that every 16th entry for a
578          * page-table operating in 64K mode must point to a properly aligned 64K
579          * region, including any PTEs which happen to point to scratch.
580          *
581          * This is only relevant for the 48b PPGTT where we support
582          * huge-gtt-pages, see also i915_vma_insert(). However, as we share the
583          * scratch (read-only) between all vm, we create one 64k scratch page
584          * for all.
585          */
586         size = I915_GTT_PAGE_SIZE_4K;
587         if (i915_vm_is_4lvl(vm) &&
588             HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
589                 size = I915_GTT_PAGE_SIZE_64K;
590                 gfp |= __GFP_NOWARN;
591         }
592         gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
593
594         do {
595                 int order = get_order(size);
596                 struct page *page;
597                 dma_addr_t addr;
598
599                 page = alloc_pages(gfp, order);
600                 if (unlikely(!page))
601                         goto skip;
602
603                 addr = dma_map_page_attrs(vm->dma,
604                                           page, 0, size,
605                                           PCI_DMA_BIDIRECTIONAL,
606                                           DMA_ATTR_SKIP_CPU_SYNC |
607                                           DMA_ATTR_NO_WARN);
608                 if (unlikely(dma_mapping_error(vm->dma, addr)))
609                         goto free_page;
610
611                 if (unlikely(!IS_ALIGNED(addr, size)))
612                         goto unmap_page;
613
614                 vm->scratch_page.page = page;
615                 vm->scratch_page.daddr = addr;
616                 vm->scratch_order = order;
617                 return 0;
618
619 unmap_page:
620                 dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL);
621 free_page:
622                 __free_pages(page, order);
623 skip:
624                 if (size == I915_GTT_PAGE_SIZE_4K)
625                         return -ENOMEM;
626
627                 size = I915_GTT_PAGE_SIZE_4K;
628                 gfp &= ~__GFP_NOWARN;
629         } while (1);
630 }
631
632 static void cleanup_scratch_page(struct i915_address_space *vm)
633 {
634         struct i915_page_dma *p = &vm->scratch_page;
635         int order = vm->scratch_order;
636
637         dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT,
638                        PCI_DMA_BIDIRECTIONAL);
639         __free_pages(p->page, order);
640 }
641
642 static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
643 {
644         struct i915_page_table *pt;
645
646         pt = kmalloc(sizeof(*pt), I915_GFP_ALLOW_FAIL);
647         if (unlikely(!pt))
648                 return ERR_PTR(-ENOMEM);
649
650         if (unlikely(setup_px(vm, pt))) {
651                 kfree(pt);
652                 return ERR_PTR(-ENOMEM);
653         }
654
655         pt->used_ptes = 0;
656         return pt;
657 }
658
659 static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
660 {
661         cleanup_px(vm, pt);
662         kfree(pt);
663 }
664
665 static void gen8_initialize_pt(struct i915_address_space *vm,
666                                struct i915_page_table *pt)
667 {
668         fill_px(vm, pt, vm->scratch_pte);
669 }
670
671 static void gen6_initialize_pt(struct i915_address_space *vm,
672                                struct i915_page_table *pt)
673 {
674         fill32_px(vm, pt, vm->scratch_pte);
675 }
676
677 static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
678 {
679         struct i915_page_directory *pd;
680
681         pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
682         if (unlikely(!pd))
683                 return ERR_PTR(-ENOMEM);
684
685         if (unlikely(setup_px(vm, pd))) {
686                 kfree(pd);
687                 return ERR_PTR(-ENOMEM);
688         }
689
690         pd->used_pdes = 0;
691         return pd;
692 }
693
694 static void free_pd(struct i915_address_space *vm,
695                     struct i915_page_directory *pd)
696 {
697         cleanup_px(vm, pd);
698         kfree(pd);
699 }
700
701 static void gen8_initialize_pd(struct i915_address_space *vm,
702                                struct i915_page_directory *pd)
703 {
704         fill_px(vm, pd,
705                 gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC));
706         memset_p((void **)pd->page_table, vm->scratch_pt, I915_PDES);
707 }
708
709 static int __pdp_init(struct i915_address_space *vm,
710                       struct i915_page_directory_pointer *pdp)
711 {
712         const unsigned int pdpes = i915_pdpes_per_pdp(vm);
713
714         pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory),
715                                             I915_GFP_ALLOW_FAIL);
716         if (unlikely(!pdp->page_directory))
717                 return -ENOMEM;
718
719         memset_p((void **)pdp->page_directory, vm->scratch_pd, pdpes);
720
721         return 0;
722 }
723
724 static void __pdp_fini(struct i915_page_directory_pointer *pdp)
725 {
726         kfree(pdp->page_directory);
727         pdp->page_directory = NULL;
728 }
729
730 static struct i915_page_directory_pointer *
731 alloc_pdp(struct i915_address_space *vm)
732 {
733         struct i915_page_directory_pointer *pdp;
734         int ret = -ENOMEM;
735
736         GEM_BUG_ON(!i915_vm_is_4lvl(vm));
737
738         pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
739         if (!pdp)
740                 return ERR_PTR(-ENOMEM);
741
742         ret = __pdp_init(vm, pdp);
743         if (ret)
744                 goto fail_bitmap;
745
746         ret = setup_px(vm, pdp);
747         if (ret)
748                 goto fail_page_m;
749
750         return pdp;
751
752 fail_page_m:
753         __pdp_fini(pdp);
754 fail_bitmap:
755         kfree(pdp);
756
757         return ERR_PTR(ret);
758 }
759
760 static void free_pdp(struct i915_address_space *vm,
761                      struct i915_page_directory_pointer *pdp)
762 {
763         __pdp_fini(pdp);
764
765         if (!i915_vm_is_4lvl(vm))
766                 return;
767
768         cleanup_px(vm, pdp);
769         kfree(pdp);
770 }
771
772 static void gen8_initialize_pdp(struct i915_address_space *vm,
773                                 struct i915_page_directory_pointer *pdp)
774 {
775         gen8_ppgtt_pdpe_t scratch_pdpe;
776
777         scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
778
779         fill_px(vm, pdp, scratch_pdpe);
780 }
781
782 static void gen8_initialize_pml4(struct i915_address_space *vm,
783                                  struct i915_pml4 *pml4)
784 {
785         fill_px(vm, pml4,
786                 gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC));
787         memset_p((void **)pml4->pdps, vm->scratch_pdp, GEN8_PML4ES_PER_PML4);
788 }
789
790 /*
791  * PDE TLBs are a pain to invalidate on GEN8+. When we modify
792  * the page table structures, we mark them dirty so that
793  * context switching/execlist queuing code takes extra steps
794  * to ensure that tlbs are flushed.
795  */
796 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
797 {
798         ppgtt->pd_dirty_engines = ALL_ENGINES;
799 }
800
801 /* Removes entries from a single page table, releasing it if it's empty.
802  * Caller can use the return value to update higher-level entries.
803  */
804 static bool gen8_ppgtt_clear_pt(const struct i915_address_space *vm,
805                                 struct i915_page_table *pt,
806                                 u64 start, u64 length)
807 {
808         unsigned int num_entries = gen8_pte_count(start, length);
809         gen8_pte_t *vaddr;
810
811         GEM_BUG_ON(num_entries > pt->used_ptes);
812
813         pt->used_ptes -= num_entries;
814         if (!pt->used_ptes)
815                 return true;
816
817         vaddr = kmap_atomic_px(pt);
818         memset64(vaddr + gen8_pte_index(start), vm->scratch_pte, num_entries);
819         kunmap_atomic(vaddr);
820
821         return false;
822 }
823
824 static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
825                                struct i915_page_directory *pd,
826                                struct i915_page_table *pt,
827                                unsigned int pde)
828 {
829         gen8_pde_t *vaddr;
830
831         pd->page_table[pde] = pt;
832
833         vaddr = kmap_atomic_px(pd);
834         vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC);
835         kunmap_atomic(vaddr);
836 }
837
838 static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
839                                 struct i915_page_directory *pd,
840                                 u64 start, u64 length)
841 {
842         struct i915_page_table *pt;
843         u32 pde;
844
845         gen8_for_each_pde(pt, pd, start, length, pde) {
846                 GEM_BUG_ON(pt == vm->scratch_pt);
847
848                 if (!gen8_ppgtt_clear_pt(vm, pt, start, length))
849                         continue;
850
851                 gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
852                 GEM_BUG_ON(!pd->used_pdes);
853                 pd->used_pdes--;
854
855                 free_pt(vm, pt);
856         }
857
858         return !pd->used_pdes;
859 }
860
861 static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm,
862                                 struct i915_page_directory_pointer *pdp,
863                                 struct i915_page_directory *pd,
864                                 unsigned int pdpe)
865 {
866         gen8_ppgtt_pdpe_t *vaddr;
867
868         pdp->page_directory[pdpe] = pd;
869         if (!i915_vm_is_4lvl(vm))
870                 return;
871
872         vaddr = kmap_atomic_px(pdp);
873         vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
874         kunmap_atomic(vaddr);
875 }
876
877 /* Removes entries from a single page dir pointer, releasing it if it's empty.
878  * Caller can use the return value to update higher-level entries
879  */
880 static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
881                                  struct i915_page_directory_pointer *pdp,
882                                  u64 start, u64 length)
883 {
884         struct i915_page_directory *pd;
885         unsigned int pdpe;
886
887         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
888                 GEM_BUG_ON(pd == vm->scratch_pd);
889
890                 if (!gen8_ppgtt_clear_pd(vm, pd, start, length))
891                         continue;
892
893                 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
894                 GEM_BUG_ON(!pdp->used_pdpes);
895                 pdp->used_pdpes--;
896
897                 free_pd(vm, pd);
898         }
899
900         return !pdp->used_pdpes;
901 }
902
903 static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
904                                   u64 start, u64 length)
905 {
906         gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length);
907 }
908
909 static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
910                                  struct i915_page_directory_pointer *pdp,
911                                  unsigned int pml4e)
912 {
913         gen8_ppgtt_pml4e_t *vaddr;
914
915         pml4->pdps[pml4e] = pdp;
916
917         vaddr = kmap_atomic_px(pml4);
918         vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
919         kunmap_atomic(vaddr);
920 }
921
922 /* Removes entries from a single pml4.
923  * This is the top-level structure in 4-level page tables used on gen8+.
924  * Empty entries are always scratch pml4e.
925  */
926 static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
927                                   u64 start, u64 length)
928 {
929         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
930         struct i915_pml4 *pml4 = &ppgtt->pml4;
931         struct i915_page_directory_pointer *pdp;
932         unsigned int pml4e;
933
934         GEM_BUG_ON(!i915_vm_is_4lvl(vm));
935
936         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
937                 GEM_BUG_ON(pdp == vm->scratch_pdp);
938
939                 if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length))
940                         continue;
941
942                 gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
943
944                 free_pdp(vm, pdp);
945         }
946 }
947
948 static inline struct sgt_dma {
949         struct scatterlist *sg;
950         dma_addr_t dma, max;
951 } sgt_dma(struct i915_vma *vma) {
952         struct scatterlist *sg = vma->pages->sgl;
953         dma_addr_t addr = sg_dma_address(sg);
954         return (struct sgt_dma) { sg, addr, addr + sg->length };
955 }
956
957 struct gen8_insert_pte {
958         u16 pml4e;
959         u16 pdpe;
960         u16 pde;
961         u16 pte;
962 };
963
964 static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
965 {
966         return (struct gen8_insert_pte) {
967                  gen8_pml4e_index(start),
968                  gen8_pdpe_index(start),
969                  gen8_pde_index(start),
970                  gen8_pte_index(start),
971         };
972 }
973
974 static __always_inline bool
975 gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
976                               struct i915_page_directory_pointer *pdp,
977                               struct sgt_dma *iter,
978                               struct gen8_insert_pte *idx,
979                               enum i915_cache_level cache_level,
980                               u32 flags)
981 {
982         struct i915_page_directory *pd;
983         const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
984         gen8_pte_t *vaddr;
985         bool ret;
986
987         GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
988         pd = pdp->page_directory[idx->pdpe];
989         vaddr = kmap_atomic_px(pd->page_table[idx->pde]);
990         do {
991                 vaddr[idx->pte] = pte_encode | iter->dma;
992
993                 iter->dma += I915_GTT_PAGE_SIZE;
994                 if (iter->dma >= iter->max) {
995                         iter->sg = __sg_next(iter->sg);
996                         if (!iter->sg) {
997                                 ret = false;
998                                 break;
999                         }
1000
1001                         iter->dma = sg_dma_address(iter->sg);
1002                         iter->max = iter->dma + iter->sg->length;
1003                 }
1004
1005                 if (++idx->pte == GEN8_PTES) {
1006                         idx->pte = 0;
1007
1008                         if (++idx->pde == I915_PDES) {
1009                                 idx->pde = 0;
1010
1011                                 /* Limited by sg length for 3lvl */
1012                                 if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
1013                                         idx->pdpe = 0;
1014                                         ret = true;
1015                                         break;
1016                                 }
1017
1018                                 GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
1019                                 pd = pdp->page_directory[idx->pdpe];
1020                         }
1021
1022                         kunmap_atomic(vaddr);
1023                         vaddr = kmap_atomic_px(pd->page_table[idx->pde]);
1024                 }
1025         } while (1);
1026         kunmap_atomic(vaddr);
1027
1028         return ret;
1029 }
1030
1031 static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
1032                                    struct i915_vma *vma,
1033                                    enum i915_cache_level cache_level,
1034                                    u32 flags)
1035 {
1036         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1037         struct sgt_dma iter = sgt_dma(vma);
1038         struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
1039
1040         gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx,
1041                                       cache_level, flags);
1042
1043         vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1044 }
1045
1046 static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
1047                                            struct i915_page_directory_pointer **pdps,
1048                                            struct sgt_dma *iter,
1049                                            enum i915_cache_level cache_level,
1050                                            u32 flags)
1051 {
1052         const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
1053         u64 start = vma->node.start;
1054         dma_addr_t rem = iter->sg->length;
1055
1056         do {
1057                 struct gen8_insert_pte idx = gen8_insert_pte(start);
1058                 struct i915_page_directory_pointer *pdp = pdps[idx.pml4e];
1059                 struct i915_page_directory *pd = pdp->page_directory[idx.pdpe];
1060                 unsigned int page_size;
1061                 bool maybe_64K = false;
1062                 gen8_pte_t encode = pte_encode;
1063                 gen8_pte_t *vaddr;
1064                 u16 index, max;
1065
1066                 if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
1067                     IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
1068                     rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) {
1069                         index = idx.pde;
1070                         max = I915_PDES;
1071                         page_size = I915_GTT_PAGE_SIZE_2M;
1072
1073                         encode |= GEN8_PDE_PS_2M;
1074
1075                         vaddr = kmap_atomic_px(pd);
1076                 } else {
1077                         struct i915_page_table *pt = pd->page_table[idx.pde];
1078
1079                         index = idx.pte;
1080                         max = GEN8_PTES;
1081                         page_size = I915_GTT_PAGE_SIZE;
1082
1083                         if (!index &&
1084                             vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
1085                             IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
1086                             (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
1087                              rem >= (max - index) * I915_GTT_PAGE_SIZE))
1088                                 maybe_64K = true;
1089
1090                         vaddr = kmap_atomic_px(pt);
1091                 }
1092
1093                 do {
1094                         GEM_BUG_ON(iter->sg->length < page_size);
1095                         vaddr[index++] = encode | iter->dma;
1096
1097                         start += page_size;
1098                         iter->dma += page_size;
1099                         rem -= page_size;
1100                         if (iter->dma >= iter->max) {
1101                                 iter->sg = __sg_next(iter->sg);
1102                                 if (!iter->sg)
1103                                         break;
1104
1105                                 rem = iter->sg->length;
1106                                 iter->dma = sg_dma_address(iter->sg);
1107                                 iter->max = iter->dma + rem;
1108
1109                                 if (maybe_64K && index < max &&
1110                                     !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
1111                                       (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
1112                                        rem >= (max - index) * I915_GTT_PAGE_SIZE)))
1113                                         maybe_64K = false;
1114
1115                                 if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
1116                                         break;
1117                         }
1118                 } while (rem >= page_size && index < max);
1119
1120                 kunmap_atomic(vaddr);
1121
1122                 /*
1123                  * Is it safe to mark the 2M block as 64K? -- Either we have
1124                  * filled whole page-table with 64K entries, or filled part of
1125                  * it and have reached the end of the sg table and we have
1126                  * enough padding.
1127                  */
1128                 if (maybe_64K &&
1129                     (index == max ||
1130                      (i915_vm_has_scratch_64K(vma->vm) &&
1131                       !iter->sg && IS_ALIGNED(vma->node.start +
1132                                               vma->node.size,
1133                                               I915_GTT_PAGE_SIZE_2M)))) {
1134                         vaddr = kmap_atomic_px(pd);
1135                         vaddr[idx.pde] |= GEN8_PDE_IPS_64K;
1136                         kunmap_atomic(vaddr);
1137                         page_size = I915_GTT_PAGE_SIZE_64K;
1138
1139                         /*
1140                          * We write all 4K page entries, even when using 64K
1141                          * pages. In order to verify that the HW isn't cheating
1142                          * by using the 4K PTE instead of the 64K PTE, we want
1143                          * to remove all the surplus entries. If the HW skipped
1144                          * the 64K PTE, it will read/write into the scratch page
1145                          * instead - which we detect as missing results during
1146                          * selftests.
1147                          */
1148                         if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
1149                                 u16 i;
1150
1151                                 encode = vma->vm->scratch_pte;
1152                                 vaddr = kmap_atomic_px(pd->page_table[idx.pde]);
1153
1154                                 for (i = 1; i < index; i += 16)
1155                                         memset64(vaddr + i, encode, 15);
1156
1157                                 kunmap_atomic(vaddr);
1158                         }
1159                 }
1160
1161                 vma->page_sizes.gtt |= page_size;
1162         } while (iter->sg);
1163 }
1164
1165 static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
1166                                    struct i915_vma *vma,
1167                                    enum i915_cache_level cache_level,
1168                                    u32 flags)
1169 {
1170         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1171         struct sgt_dma iter = sgt_dma(vma);
1172         struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps;
1173
1174         if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
1175                 gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level,
1176                                                flags);
1177         } else {
1178                 struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
1179
1180                 while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++],
1181                                                      &iter, &idx, cache_level,
1182                                                      flags))
1183                         GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
1184
1185                 vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1186         }
1187 }
1188
1189 static void gen8_free_page_tables(struct i915_address_space *vm,
1190                                   struct i915_page_directory *pd)
1191 {
1192         int i;
1193
1194         for (i = 0; i < I915_PDES; i++) {
1195                 if (pd->page_table[i] != vm->scratch_pt)
1196                         free_pt(vm, pd->page_table[i]);
1197         }
1198 }
1199
1200 static int gen8_init_scratch(struct i915_address_space *vm)
1201 {
1202         int ret;
1203
1204         /*
1205          * If everybody agrees to not to write into the scratch page,
1206          * we can reuse it for all vm, keeping contexts and processes separate.
1207          */
1208         if (vm->has_read_only &&
1209             vm->i915->kernel_context &&
1210             vm->i915->kernel_context->ppgtt) {
1211                 struct i915_address_space *clone =
1212                         &vm->i915->kernel_context->ppgtt->vm;
1213
1214                 GEM_BUG_ON(!clone->has_read_only);
1215
1216                 vm->scratch_order = clone->scratch_order;
1217                 vm->scratch_pte = clone->scratch_pte;
1218                 vm->scratch_pt  = clone->scratch_pt;
1219                 vm->scratch_pd  = clone->scratch_pd;
1220                 vm->scratch_pdp = clone->scratch_pdp;
1221                 return 0;
1222         }
1223
1224         ret = setup_scratch_page(vm, __GFP_HIGHMEM);
1225         if (ret)
1226                 return ret;
1227
1228         vm->scratch_pte =
1229                 gen8_pte_encode(vm->scratch_page.daddr,
1230                                 I915_CACHE_LLC,
1231                                 vm->has_read_only);
1232
1233         vm->scratch_pt = alloc_pt(vm);
1234         if (IS_ERR(vm->scratch_pt)) {
1235                 ret = PTR_ERR(vm->scratch_pt);
1236                 goto free_scratch_page;
1237         }
1238
1239         vm->scratch_pd = alloc_pd(vm);
1240         if (IS_ERR(vm->scratch_pd)) {
1241                 ret = PTR_ERR(vm->scratch_pd);
1242                 goto free_pt;
1243         }
1244
1245         if (i915_vm_is_4lvl(vm)) {
1246                 vm->scratch_pdp = alloc_pdp(vm);
1247                 if (IS_ERR(vm->scratch_pdp)) {
1248                         ret = PTR_ERR(vm->scratch_pdp);
1249                         goto free_pd;
1250                 }
1251         }
1252
1253         gen8_initialize_pt(vm, vm->scratch_pt);
1254         gen8_initialize_pd(vm, vm->scratch_pd);
1255         if (i915_vm_is_4lvl(vm))
1256                 gen8_initialize_pdp(vm, vm->scratch_pdp);
1257
1258         return 0;
1259
1260 free_pd:
1261         free_pd(vm, vm->scratch_pd);
1262 free_pt:
1263         free_pt(vm, vm->scratch_pt);
1264 free_scratch_page:
1265         cleanup_scratch_page(vm);
1266
1267         return ret;
1268 }
1269
1270 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
1271 {
1272         struct i915_address_space *vm = &ppgtt->vm;
1273         struct drm_i915_private *dev_priv = vm->i915;
1274         enum vgt_g2v_type msg;
1275         int i;
1276
1277         if (i915_vm_is_4lvl(vm)) {
1278                 const u64 daddr = px_dma(&ppgtt->pml4);
1279
1280                 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
1281                 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
1282
1283                 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
1284                                 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
1285         } else {
1286                 for (i = 0; i < GEN8_3LVL_PDPES; i++) {
1287                         const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
1288
1289                         I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
1290                         I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
1291                 }
1292
1293                 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
1294                                 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
1295         }
1296
1297         I915_WRITE(vgtif_reg(g2v_notify), msg);
1298
1299         return 0;
1300 }
1301
1302 static void gen8_free_scratch(struct i915_address_space *vm)
1303 {
1304         if (!vm->scratch_page.daddr)
1305                 return;
1306
1307         if (i915_vm_is_4lvl(vm))
1308                 free_pdp(vm, vm->scratch_pdp);
1309         free_pd(vm, vm->scratch_pd);
1310         free_pt(vm, vm->scratch_pt);
1311         cleanup_scratch_page(vm);
1312 }
1313
1314 static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
1315                                     struct i915_page_directory_pointer *pdp)
1316 {
1317         const unsigned int pdpes = i915_pdpes_per_pdp(vm);
1318         int i;
1319
1320         for (i = 0; i < pdpes; i++) {
1321                 if (pdp->page_directory[i] == vm->scratch_pd)
1322                         continue;
1323
1324                 gen8_free_page_tables(vm, pdp->page_directory[i]);
1325                 free_pd(vm, pdp->page_directory[i]);
1326         }
1327
1328         free_pdp(vm, pdp);
1329 }
1330
1331 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
1332 {
1333         int i;
1334
1335         for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
1336                 if (ppgtt->pml4.pdps[i] == ppgtt->vm.scratch_pdp)
1337                         continue;
1338
1339                 gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pml4.pdps[i]);
1340         }
1341
1342         cleanup_px(&ppgtt->vm, &ppgtt->pml4);
1343 }
1344
1345 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
1346 {
1347         struct drm_i915_private *dev_priv = vm->i915;
1348         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1349
1350         if (intel_vgpu_active(dev_priv))
1351                 gen8_ppgtt_notify_vgt(ppgtt, false);
1352
1353         if (i915_vm_is_4lvl(vm))
1354                 gen8_ppgtt_cleanup_4lvl(ppgtt);
1355         else
1356                 gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, &ppgtt->pdp);
1357
1358         gen8_free_scratch(vm);
1359 }
1360
1361 static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
1362                                struct i915_page_directory *pd,
1363                                u64 start, u64 length)
1364 {
1365         struct i915_page_table *pt;
1366         u64 from = start;
1367         unsigned int pde;
1368
1369         gen8_for_each_pde(pt, pd, start, length, pde) {
1370                 int count = gen8_pte_count(start, length);
1371
1372                 if (pt == vm->scratch_pt) {
1373                         pd->used_pdes++;
1374
1375                         pt = alloc_pt(vm);
1376                         if (IS_ERR(pt)) {
1377                                 pd->used_pdes--;
1378                                 goto unwind;
1379                         }
1380
1381                         if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
1382                                 gen8_initialize_pt(vm, pt);
1383
1384                         gen8_ppgtt_set_pde(vm, pd, pt, pde);
1385                         GEM_BUG_ON(pd->used_pdes > I915_PDES);
1386                 }
1387
1388                 pt->used_ptes += count;
1389         }
1390         return 0;
1391
1392 unwind:
1393         gen8_ppgtt_clear_pd(vm, pd, from, start - from);
1394         return -ENOMEM;
1395 }
1396
1397 static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
1398                                 struct i915_page_directory_pointer *pdp,
1399                                 u64 start, u64 length)
1400 {
1401         struct i915_page_directory *pd;
1402         u64 from = start;
1403         unsigned int pdpe;
1404         int ret;
1405
1406         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1407                 if (pd == vm->scratch_pd) {
1408                         pdp->used_pdpes++;
1409
1410                         pd = alloc_pd(vm);
1411                         if (IS_ERR(pd)) {
1412                                 pdp->used_pdpes--;
1413                                 goto unwind;
1414                         }
1415
1416                         gen8_initialize_pd(vm, pd);
1417                         gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
1418                         GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm));
1419                 }
1420
1421                 ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
1422                 if (unlikely(ret))
1423                         goto unwind_pd;
1424         }
1425
1426         return 0;
1427
1428 unwind_pd:
1429         if (!pd->used_pdes) {
1430                 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
1431                 GEM_BUG_ON(!pdp->used_pdpes);
1432                 pdp->used_pdpes--;
1433                 free_pd(vm, pd);
1434         }
1435 unwind:
1436         gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
1437         return -ENOMEM;
1438 }
1439
1440 static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
1441                                  u64 start, u64 length)
1442 {
1443         return gen8_ppgtt_alloc_pdp(vm,
1444                                     &i915_vm_to_ppgtt(vm)->pdp, start, length);
1445 }
1446
1447 static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
1448                                  u64 start, u64 length)
1449 {
1450         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1451         struct i915_pml4 *pml4 = &ppgtt->pml4;
1452         struct i915_page_directory_pointer *pdp;
1453         u64 from = start;
1454         u32 pml4e;
1455         int ret;
1456
1457         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1458                 if (pml4->pdps[pml4e] == vm->scratch_pdp) {
1459                         pdp = alloc_pdp(vm);
1460                         if (IS_ERR(pdp))
1461                                 goto unwind;
1462
1463                         gen8_initialize_pdp(vm, pdp);
1464                         gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
1465                 }
1466
1467                 ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
1468                 if (unlikely(ret))
1469                         goto unwind_pdp;
1470         }
1471
1472         return 0;
1473
1474 unwind_pdp:
1475         if (!pdp->used_pdpes) {
1476                 gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
1477                 free_pdp(vm, pdp);
1478         }
1479 unwind:
1480         gen8_ppgtt_clear_4lvl(vm, from, start - from);
1481         return -ENOMEM;
1482 }
1483
1484 static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
1485 {
1486         struct i915_address_space *vm = &ppgtt->vm;
1487         struct i915_page_directory_pointer *pdp = &ppgtt->pdp;
1488         struct i915_page_directory *pd;
1489         u64 start = 0, length = ppgtt->vm.total;
1490         u64 from = start;
1491         unsigned int pdpe;
1492
1493         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1494                 pd = alloc_pd(vm);
1495                 if (IS_ERR(pd))
1496                         goto unwind;
1497
1498                 gen8_initialize_pd(vm, pd);
1499                 gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
1500                 pdp->used_pdpes++;
1501         }
1502
1503         pdp->used_pdpes++; /* never remove */
1504         return 0;
1505
1506 unwind:
1507         start -= from;
1508         gen8_for_each_pdpe(pd, pdp, from, start, pdpe) {
1509                 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
1510                 free_pd(vm, pd);
1511         }
1512         pdp->used_pdpes = 0;
1513         return -ENOMEM;
1514 }
1515
1516 static void ppgtt_init(struct drm_i915_private *i915,
1517                        struct i915_hw_ppgtt *ppgtt)
1518 {
1519         kref_init(&ppgtt->ref);
1520
1521         ppgtt->vm.i915 = i915;
1522         ppgtt->vm.dma = &i915->drm.pdev->dev;
1523         ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size);
1524
1525         i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
1526
1527         ppgtt->vm.vma_ops.bind_vma    = ppgtt_bind_vma;
1528         ppgtt->vm.vma_ops.unbind_vma  = ppgtt_unbind_vma;
1529         ppgtt->vm.vma_ops.set_pages   = ppgtt_set_pages;
1530         ppgtt->vm.vma_ops.clear_pages = clear_pages;
1531 }
1532
1533 /*
1534  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1535  * with a net effect resembling a 2-level page table in normal x86 terms. Each
1536  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1537  * space.
1538  *
1539  */
1540 static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
1541 {
1542         struct i915_hw_ppgtt *ppgtt;
1543         int err;
1544
1545         ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
1546         if (!ppgtt)
1547                 return ERR_PTR(-ENOMEM);
1548
1549         ppgtt_init(i915, ppgtt);
1550
1551         /*
1552          * From bdw, there is hw support for read-only pages in the PPGTT.
1553          *
1554          * Gen11 has HSDES#:1807136187 unresolved. Disable ro support
1555          * for now.
1556          */
1557         ppgtt->vm.has_read_only = INTEL_GEN(i915) != 11;
1558
1559         /* There are only few exceptions for gen >=6. chv and bxt.
1560          * And we are not sure about the latter so play safe for now.
1561          */
1562         if (IS_CHERRYVIEW(i915) || IS_BROXTON(i915))
1563                 ppgtt->vm.pt_kmap_wc = true;
1564
1565         err = gen8_init_scratch(&ppgtt->vm);
1566         if (err)
1567                 goto err_free;
1568
1569         if (i915_vm_is_4lvl(&ppgtt->vm)) {
1570                 err = setup_px(&ppgtt->vm, &ppgtt->pml4);
1571                 if (err)
1572                         goto err_scratch;
1573
1574                 gen8_initialize_pml4(&ppgtt->vm, &ppgtt->pml4);
1575
1576                 ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl;
1577                 ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl;
1578                 ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl;
1579         } else {
1580                 err = __pdp_init(&ppgtt->vm, &ppgtt->pdp);
1581                 if (err)
1582                         goto err_scratch;
1583
1584                 if (intel_vgpu_active(i915)) {
1585                         err = gen8_preallocate_top_level_pdp(ppgtt);
1586                         if (err) {
1587                                 __pdp_fini(&ppgtt->pdp);
1588                                 goto err_scratch;
1589                         }
1590                 }
1591
1592                 ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl;
1593                 ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl;
1594                 ppgtt->vm.clear_range = gen8_ppgtt_clear_3lvl;
1595         }
1596
1597         if (intel_vgpu_active(i915))
1598                 gen8_ppgtt_notify_vgt(ppgtt, true);
1599
1600         ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
1601
1602         return ppgtt;
1603
1604 err_scratch:
1605         gen8_free_scratch(&ppgtt->vm);
1606 err_free:
1607         kfree(ppgtt);
1608         return ERR_PTR(err);
1609 }
1610
1611 /* Write pde (index) from the page directory @pd to the page table @pt */
1612 static inline void gen6_write_pde(const struct gen6_hw_ppgtt *ppgtt,
1613                                   const unsigned int pde,
1614                                   const struct i915_page_table *pt)
1615 {
1616         /* Caller needs to make sure the write completes if necessary */
1617         iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
1618                   ppgtt->pd_addr + pde);
1619 }
1620
1621 static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv)
1622 {
1623         struct intel_engine_cs *engine;
1624         u32 ecochk, ecobits;
1625         enum intel_engine_id id;
1626
1627         ecobits = I915_READ(GAC_ECO_BITS);
1628         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1629
1630         ecochk = I915_READ(GAM_ECOCHK);
1631         if (IS_HASWELL(dev_priv)) {
1632                 ecochk |= ECOCHK_PPGTT_WB_HSW;
1633         } else {
1634                 ecochk |= ECOCHK_PPGTT_LLC_IVB;
1635                 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1636         }
1637         I915_WRITE(GAM_ECOCHK, ecochk);
1638
1639         for_each_engine(engine, dev_priv, id) {
1640                 /* GFX_MODE is per-ring on gen7+ */
1641                 I915_WRITE(RING_MODE_GEN7(engine),
1642                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1643         }
1644 }
1645
1646 static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv)
1647 {
1648         u32 ecochk, gab_ctl, ecobits;
1649
1650         ecobits = I915_READ(GAC_ECO_BITS);
1651         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1652                    ECOBITS_PPGTT_CACHE64B);
1653
1654         gab_ctl = I915_READ(GAB_CTL);
1655         I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1656
1657         ecochk = I915_READ(GAM_ECOCHK);
1658         I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1659
1660         if (HAS_PPGTT(dev_priv)) /* may be disabled for VT-d */
1661                 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1662 }
1663
1664 /* PPGTT support for Sandybdrige/Gen6 and later */
1665 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1666                                    u64 start, u64 length)
1667 {
1668         struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
1669         unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
1670         unsigned int pde = first_entry / GEN6_PTES;
1671         unsigned int pte = first_entry % GEN6_PTES;
1672         unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
1673         const gen6_pte_t scratch_pte = vm->scratch_pte;
1674
1675         while (num_entries) {
1676                 struct i915_page_table *pt = ppgtt->base.pd.page_table[pde++];
1677                 const unsigned int count = min(num_entries, GEN6_PTES - pte);
1678                 gen6_pte_t *vaddr;
1679
1680                 GEM_BUG_ON(pt == vm->scratch_pt);
1681
1682                 num_entries -= count;
1683
1684                 GEM_BUG_ON(count > pt->used_ptes);
1685                 pt->used_ptes -= count;
1686                 if (!pt->used_ptes)
1687                         ppgtt->scan_for_unused_pt = true;
1688
1689                 /*
1690                  * Note that the hw doesn't support removing PDE on the fly
1691                  * (they are cached inside the context with no means to
1692                  * invalidate the cache), so we can only reset the PTE
1693                  * entries back to scratch.
1694                  */
1695
1696                 vaddr = kmap_atomic_px(pt);
1697                 memset32(vaddr + pte, scratch_pte, count);
1698                 kunmap_atomic(vaddr);
1699
1700                 pte = 0;
1701         }
1702 }
1703
1704 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1705                                       struct i915_vma *vma,
1706                                       enum i915_cache_level cache_level,
1707                                       u32 flags)
1708 {
1709         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1710         unsigned first_entry = vma->node.start / I915_GTT_PAGE_SIZE;
1711         unsigned act_pt = first_entry / GEN6_PTES;
1712         unsigned act_pte = first_entry % GEN6_PTES;
1713         const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
1714         struct sgt_dma iter = sgt_dma(vma);
1715         gen6_pte_t *vaddr;
1716
1717         GEM_BUG_ON(ppgtt->pd.page_table[act_pt] == vm->scratch_pt);
1718
1719         vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]);
1720         do {
1721                 vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
1722
1723                 iter.dma += I915_GTT_PAGE_SIZE;
1724                 if (iter.dma == iter.max) {
1725                         iter.sg = __sg_next(iter.sg);
1726                         if (!iter.sg)
1727                                 break;
1728
1729                         iter.dma = sg_dma_address(iter.sg);
1730                         iter.max = iter.dma + iter.sg->length;
1731                 }
1732
1733                 if (++act_pte == GEN6_PTES) {
1734                         kunmap_atomic(vaddr);
1735                         vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]);
1736                         act_pte = 0;
1737                 }
1738         } while (1);
1739         kunmap_atomic(vaddr);
1740
1741         vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1742 }
1743
1744 static int gen6_alloc_va_range(struct i915_address_space *vm,
1745                                u64 start, u64 length)
1746 {
1747         struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
1748         struct i915_page_table *pt;
1749         u64 from = start;
1750         unsigned int pde;
1751         bool flush = false;
1752
1753         gen6_for_each_pde(pt, &ppgtt->base.pd, start, length, pde) {
1754                 const unsigned int count = gen6_pte_count(start, length);
1755
1756                 if (pt == vm->scratch_pt) {
1757                         pt = alloc_pt(vm);
1758                         if (IS_ERR(pt))
1759                                 goto unwind_out;
1760
1761                         gen6_initialize_pt(vm, pt);
1762                         ppgtt->base.pd.page_table[pde] = pt;
1763
1764                         if (i915_vma_is_bound(ppgtt->vma,
1765                                               I915_VMA_GLOBAL_BIND)) {
1766                                 gen6_write_pde(ppgtt, pde, pt);
1767                                 flush = true;
1768                         }
1769
1770                         GEM_BUG_ON(pt->used_ptes);
1771                 }
1772
1773                 pt->used_ptes += count;
1774         }
1775
1776         if (flush) {
1777                 mark_tlbs_dirty(&ppgtt->base);
1778                 gen6_ggtt_invalidate(ppgtt->base.vm.i915);
1779         }
1780
1781         return 0;
1782
1783 unwind_out:
1784         gen6_ppgtt_clear_range(vm, from, start - from);
1785         return -ENOMEM;
1786 }
1787
1788 static int gen6_ppgtt_init_scratch(struct gen6_hw_ppgtt *ppgtt)
1789 {
1790         struct i915_address_space * const vm = &ppgtt->base.vm;
1791         struct i915_page_table *unused;
1792         u32 pde;
1793         int ret;
1794
1795         ret = setup_scratch_page(vm, __GFP_HIGHMEM);
1796         if (ret)
1797                 return ret;
1798
1799         vm->scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
1800                                          I915_CACHE_NONE,
1801                                          PTE_READ_ONLY);
1802
1803         vm->scratch_pt = alloc_pt(vm);
1804         if (IS_ERR(vm->scratch_pt)) {
1805                 cleanup_scratch_page(vm);
1806                 return PTR_ERR(vm->scratch_pt);
1807         }
1808
1809         gen6_initialize_pt(vm, vm->scratch_pt);
1810         gen6_for_all_pdes(unused, &ppgtt->base.pd, pde)
1811                 ppgtt->base.pd.page_table[pde] = vm->scratch_pt;
1812
1813         return 0;
1814 }
1815
1816 static void gen6_ppgtt_free_scratch(struct i915_address_space *vm)
1817 {
1818         free_pt(vm, vm->scratch_pt);
1819         cleanup_scratch_page(vm);
1820 }
1821
1822 static void gen6_ppgtt_free_pd(struct gen6_hw_ppgtt *ppgtt)
1823 {
1824         struct i915_page_table *pt;
1825         u32 pde;
1826
1827         gen6_for_all_pdes(pt, &ppgtt->base.pd, pde)
1828                 if (pt != ppgtt->base.vm.scratch_pt)
1829                         free_pt(&ppgtt->base.vm, pt);
1830 }
1831
1832 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
1833 {
1834         struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
1835
1836         i915_vma_destroy(ppgtt->vma);
1837
1838         gen6_ppgtt_free_pd(ppgtt);
1839         gen6_ppgtt_free_scratch(vm);
1840 }
1841
1842 static int pd_vma_set_pages(struct i915_vma *vma)
1843 {
1844         vma->pages = ERR_PTR(-ENODEV);
1845         return 0;
1846 }
1847
1848 static void pd_vma_clear_pages(struct i915_vma *vma)
1849 {
1850         GEM_BUG_ON(!vma->pages);
1851
1852         vma->pages = NULL;
1853 }
1854
1855 static int pd_vma_bind(struct i915_vma *vma,
1856                        enum i915_cache_level cache_level,
1857                        u32 unused)
1858 {
1859         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
1860         struct gen6_hw_ppgtt *ppgtt = vma->private;
1861         u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
1862         struct i915_page_table *pt;
1863         unsigned int pde;
1864
1865         ppgtt->base.pd.base.ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
1866         ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
1867
1868         gen6_for_all_pdes(pt, &ppgtt->base.pd, pde)
1869                 gen6_write_pde(ppgtt, pde, pt);
1870
1871         mark_tlbs_dirty(&ppgtt->base);
1872         gen6_ggtt_invalidate(ppgtt->base.vm.i915);
1873
1874         return 0;
1875 }
1876
1877 static void pd_vma_unbind(struct i915_vma *vma)
1878 {
1879         struct gen6_hw_ppgtt *ppgtt = vma->private;
1880         struct i915_page_table * const scratch_pt = ppgtt->base.vm.scratch_pt;
1881         struct i915_page_table *pt;
1882         unsigned int pde;
1883
1884         if (!ppgtt->scan_for_unused_pt)
1885                 return;
1886
1887         /* Free all no longer used page tables */
1888         gen6_for_all_pdes(pt, &ppgtt->base.pd, pde) {
1889                 if (pt->used_ptes || pt == scratch_pt)
1890                         continue;
1891
1892                 free_pt(&ppgtt->base.vm, pt);
1893                 ppgtt->base.pd.page_table[pde] = scratch_pt;
1894         }
1895
1896         ppgtt->scan_for_unused_pt = false;
1897 }
1898
1899 static const struct i915_vma_ops pd_vma_ops = {
1900         .set_pages = pd_vma_set_pages,
1901         .clear_pages = pd_vma_clear_pages,
1902         .bind_vma = pd_vma_bind,
1903         .unbind_vma = pd_vma_unbind,
1904 };
1905
1906 static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
1907 {
1908         struct drm_i915_private *i915 = ppgtt->base.vm.i915;
1909         struct i915_ggtt *ggtt = &i915->ggtt;
1910         struct i915_vma *vma;
1911
1912         GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
1913         GEM_BUG_ON(size > ggtt->vm.total);
1914
1915         vma = i915_vma_alloc();
1916         if (!vma)
1917                 return ERR_PTR(-ENOMEM);
1918
1919         i915_active_init(i915, &vma->active, NULL);
1920         INIT_ACTIVE_REQUEST(&vma->last_fence);
1921
1922         vma->vm = &ggtt->vm;
1923         vma->ops = &pd_vma_ops;
1924         vma->private = ppgtt;
1925
1926         vma->size = size;
1927         vma->fence_size = size;
1928         vma->flags = I915_VMA_GGTT;
1929         vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
1930
1931         INIT_LIST_HEAD(&vma->obj_link);
1932
1933         mutex_lock(&vma->vm->mutex);
1934         list_add(&vma->vm_link, &vma->vm->unbound_list);
1935         mutex_unlock(&vma->vm->mutex);
1936
1937         return vma;
1938 }
1939
1940 int gen6_ppgtt_pin(struct i915_hw_ppgtt *base)
1941 {
1942         struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
1943         int err;
1944
1945         GEM_BUG_ON(ppgtt->base.vm.closed);
1946
1947         /*
1948          * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
1949          * which will be pinned into every active context.
1950          * (When vma->pin_count becomes atomic, I expect we will naturally
1951          * need a larger, unpacked, type and kill this redundancy.)
1952          */
1953         if (ppgtt->pin_count++)
1954                 return 0;
1955
1956         /*
1957          * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
1958          * allocator works in address space sizes, so it's multiplied by page
1959          * size. We allocate at the top of the GTT to avoid fragmentation.
1960          */
1961         err = i915_vma_pin(ppgtt->vma,
1962                            0, GEN6_PD_ALIGN,
1963                            PIN_GLOBAL | PIN_HIGH);
1964         if (err)
1965                 goto unpin;
1966
1967         return 0;
1968
1969 unpin:
1970         ppgtt->pin_count = 0;
1971         return err;
1972 }
1973
1974 void gen6_ppgtt_unpin(struct i915_hw_ppgtt *base)
1975 {
1976         struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
1977
1978         GEM_BUG_ON(!ppgtt->pin_count);
1979         if (--ppgtt->pin_count)
1980                 return;
1981
1982         i915_vma_unpin(ppgtt->vma);
1983 }
1984
1985 void gen6_ppgtt_unpin_all(struct i915_hw_ppgtt *base)
1986 {
1987         struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
1988
1989         if (!ppgtt->pin_count)
1990                 return;
1991
1992         ppgtt->pin_count = 0;
1993         i915_vma_unpin(ppgtt->vma);
1994 }
1995
1996 static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
1997 {
1998         struct i915_ggtt * const ggtt = &i915->ggtt;
1999         struct gen6_hw_ppgtt *ppgtt;
2000         int err;
2001
2002         ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2003         if (!ppgtt)
2004                 return ERR_PTR(-ENOMEM);
2005
2006         ppgtt_init(i915, &ppgtt->base);
2007
2008         ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
2009         ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
2010         ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
2011         ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
2012
2013         ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
2014
2015         err = gen6_ppgtt_init_scratch(ppgtt);
2016         if (err)
2017                 goto err_free;
2018
2019         ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
2020         if (IS_ERR(ppgtt->vma)) {
2021                 err = PTR_ERR(ppgtt->vma);
2022                 goto err_scratch;
2023         }
2024
2025         return &ppgtt->base;
2026
2027 err_scratch:
2028         gen6_ppgtt_free_scratch(&ppgtt->base.vm);
2029 err_free:
2030         kfree(ppgtt);
2031         return ERR_PTR(err);
2032 }
2033
2034 static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
2035 {
2036         /* This function is for gtt related workarounds. This function is
2037          * called on driver load and after a GPU reset, so you can place
2038          * workarounds here even if they get overwritten by GPU reset.
2039          */
2040         /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
2041         if (IS_BROADWELL(dev_priv))
2042                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
2043         else if (IS_CHERRYVIEW(dev_priv))
2044                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
2045         else if (IS_GEN9_LP(dev_priv))
2046                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
2047         else if (INTEL_GEN(dev_priv) >= 9)
2048                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
2049
2050         /*
2051          * To support 64K PTEs we need to first enable the use of the
2052          * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
2053          * mmio, otherwise the page-walker will simply ignore the IPS bit. This
2054          * shouldn't be needed after GEN10.
2055          *
2056          * 64K pages were first introduced from BDW+, although technically they
2057          * only *work* from gen9+. For pre-BDW we instead have the option for
2058          * 32K pages, but we don't currently have any support for it in our
2059          * driver.
2060          */
2061         if (HAS_PAGE_SIZES(dev_priv, I915_GTT_PAGE_SIZE_64K) &&
2062             INTEL_GEN(dev_priv) <= 10)
2063                 I915_WRITE(GEN8_GAMW_ECO_DEV_RW_IA,
2064                            I915_READ(GEN8_GAMW_ECO_DEV_RW_IA) |
2065                            GAMW_ECO_ENABLE_64K_IPS_FIELD);
2066 }
2067
2068 int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv)
2069 {
2070         gtt_write_workarounds(dev_priv);
2071
2072         if (IS_GEN(dev_priv, 6))
2073                 gen6_ppgtt_enable(dev_priv);
2074         else if (IS_GEN(dev_priv, 7))
2075                 gen7_ppgtt_enable(dev_priv);
2076
2077         return 0;
2078 }
2079
2080 static struct i915_hw_ppgtt *
2081 __hw_ppgtt_create(struct drm_i915_private *i915)
2082 {
2083         if (INTEL_GEN(i915) < 8)
2084                 return gen6_ppgtt_create(i915);
2085         else
2086                 return gen8_ppgtt_create(i915);
2087 }
2088
2089 struct i915_hw_ppgtt *
2090 i915_ppgtt_create(struct drm_i915_private *i915)
2091 {
2092         struct i915_hw_ppgtt *ppgtt;
2093
2094         ppgtt = __hw_ppgtt_create(i915);
2095         if (IS_ERR(ppgtt))
2096                 return ppgtt;
2097
2098         trace_i915_ppgtt_create(&ppgtt->vm);
2099
2100         return ppgtt;
2101 }
2102
2103 static void ppgtt_destroy_vma(struct i915_address_space *vm)
2104 {
2105         struct list_head *phases[] = {
2106                 &vm->bound_list,
2107                 &vm->unbound_list,
2108                 NULL,
2109         }, **phase;
2110
2111         vm->closed = true;
2112         for (phase = phases; *phase; phase++) {
2113                 struct i915_vma *vma, *vn;
2114
2115                 list_for_each_entry_safe(vma, vn, *phase, vm_link)
2116                         i915_vma_destroy(vma);
2117         }
2118 }
2119
2120 void i915_ppgtt_release(struct kref *kref)
2121 {
2122         struct i915_hw_ppgtt *ppgtt =
2123                 container_of(kref, struct i915_hw_ppgtt, ref);
2124
2125         trace_i915_ppgtt_release(&ppgtt->vm);
2126
2127         ppgtt_destroy_vma(&ppgtt->vm);
2128
2129         GEM_BUG_ON(!list_empty(&ppgtt->vm.bound_list));
2130         GEM_BUG_ON(!list_empty(&ppgtt->vm.unbound_list));
2131
2132         ppgtt->vm.cleanup(&ppgtt->vm);
2133         i915_address_space_fini(&ppgtt->vm);
2134         kfree(ppgtt);
2135 }
2136
2137 /* Certain Gen5 chipsets require require idling the GPU before
2138  * unmapping anything from the GTT when VT-d is enabled.
2139  */
2140 static bool needs_idle_maps(struct drm_i915_private *dev_priv)
2141 {
2142         /* Query intel_iommu to see if we need the workaround. Presumably that
2143          * was loaded first.
2144          */
2145         return IS_GEN(dev_priv, 5) && IS_MOBILE(dev_priv) && intel_vtd_active();
2146 }
2147
2148 static void gen6_check_faults(struct drm_i915_private *dev_priv)
2149 {
2150         struct intel_engine_cs *engine;
2151         enum intel_engine_id id;
2152         u32 fault;
2153
2154         for_each_engine(engine, dev_priv, id) {
2155                 fault = I915_READ(RING_FAULT_REG(engine));
2156                 if (fault & RING_FAULT_VALID) {
2157                         DRM_DEBUG_DRIVER("Unexpected fault\n"
2158                                          "\tAddr: 0x%08lx\n"
2159                                          "\tAddress space: %s\n"
2160                                          "\tSource ID: %d\n"
2161                                          "\tType: %d\n",
2162                                          fault & PAGE_MASK,
2163                                          fault & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
2164                                          RING_FAULT_SRCID(fault),
2165                                          RING_FAULT_FAULT_TYPE(fault));
2166                 }
2167         }
2168 }
2169
2170 static void gen8_check_faults(struct drm_i915_private *dev_priv)
2171 {
2172         u32 fault = I915_READ(GEN8_RING_FAULT_REG);
2173
2174         if (fault & RING_FAULT_VALID) {
2175                 u32 fault_data0, fault_data1;
2176                 u64 fault_addr;
2177
2178                 fault_data0 = I915_READ(GEN8_FAULT_TLB_DATA0);
2179                 fault_data1 = I915_READ(GEN8_FAULT_TLB_DATA1);
2180                 fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
2181                              ((u64)fault_data0 << 12);
2182
2183                 DRM_DEBUG_DRIVER("Unexpected fault\n"
2184                                  "\tAddr: 0x%08x_%08x\n"
2185                                  "\tAddress space: %s\n"
2186                                  "\tEngine ID: %d\n"
2187                                  "\tSource ID: %d\n"
2188                                  "\tType: %d\n",
2189                                  upper_32_bits(fault_addr),
2190                                  lower_32_bits(fault_addr),
2191                                  fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
2192                                  GEN8_RING_FAULT_ENGINE_ID(fault),
2193                                  RING_FAULT_SRCID(fault),
2194                                  RING_FAULT_FAULT_TYPE(fault));
2195         }
2196 }
2197
2198 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
2199 {
2200         /* From GEN8 onwards we only have one 'All Engine Fault Register' */
2201         if (INTEL_GEN(dev_priv) >= 8)
2202                 gen8_check_faults(dev_priv);
2203         else if (INTEL_GEN(dev_priv) >= 6)
2204                 gen6_check_faults(dev_priv);
2205         else
2206                 return;
2207
2208         i915_clear_error_registers(dev_priv);
2209 }
2210
2211 void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv)
2212 {
2213         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2214
2215         /* Don't bother messing with faults pre GEN6 as we have little
2216          * documentation supporting that it's a good idea.
2217          */
2218         if (INTEL_GEN(dev_priv) < 6)
2219                 return;
2220
2221         i915_check_and_clear_faults(dev_priv);
2222
2223         ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
2224
2225         i915_ggtt_invalidate(dev_priv);
2226 }
2227
2228 int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
2229                                struct sg_table *pages)
2230 {
2231         do {
2232                 if (dma_map_sg_attrs(&obj->base.dev->pdev->dev,
2233                                      pages->sgl, pages->nents,
2234                                      PCI_DMA_BIDIRECTIONAL,
2235                                      DMA_ATTR_NO_WARN))
2236                         return 0;
2237
2238                 /*
2239                  * If the DMA remap fails, one cause can be that we have
2240                  * too many objects pinned in a small remapping table,
2241                  * such as swiotlb. Incrementally purge all other objects and
2242                  * try again - if there are no more pages to remove from
2243                  * the DMA remapper, i915_gem_shrink will return 0.
2244                  */
2245                 GEM_BUG_ON(obj->mm.pages == pages);
2246         } while (i915_gem_shrink(to_i915(obj->base.dev),
2247                                  obj->base.size >> PAGE_SHIFT, NULL,
2248                                  I915_SHRINK_BOUND |
2249                                  I915_SHRINK_UNBOUND));
2250
2251         return -ENOSPC;
2252 }
2253
2254 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
2255 {
2256         writeq(pte, addr);
2257 }
2258
2259 static void gen8_ggtt_insert_page(struct i915_address_space *vm,
2260                                   dma_addr_t addr,
2261                                   u64 offset,
2262                                   enum i915_cache_level level,
2263                                   u32 unused)
2264 {
2265         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2266         gen8_pte_t __iomem *pte =
2267                 (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
2268
2269         gen8_set_pte(pte, gen8_pte_encode(addr, level, 0));
2270
2271         ggtt->invalidate(vm->i915);
2272 }
2273
2274 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2275                                      struct i915_vma *vma,
2276                                      enum i915_cache_level level,
2277                                      u32 flags)
2278 {
2279         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2280         struct sgt_iter sgt_iter;
2281         gen8_pte_t __iomem *gtt_entries;
2282         const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
2283         dma_addr_t addr;
2284
2285         /*
2286          * Note that we ignore PTE_READ_ONLY here. The caller must be careful
2287          * not to allow the user to override access to a read only page.
2288          */
2289
2290         gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
2291         gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE;
2292         for_each_sgt_dma(addr, sgt_iter, vma->pages)
2293                 gen8_set_pte(gtt_entries++, pte_encode | addr);
2294
2295         /*
2296          * We want to flush the TLBs only after we're certain all the PTE
2297          * updates have finished.
2298          */
2299         ggtt->invalidate(vm->i915);
2300 }
2301
2302 static void gen6_ggtt_insert_page(struct i915_address_space *vm,
2303                                   dma_addr_t addr,
2304                                   u64 offset,
2305                                   enum i915_cache_level level,
2306                                   u32 flags)
2307 {
2308         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2309         gen6_pte_t __iomem *pte =
2310                 (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
2311
2312         iowrite32(vm->pte_encode(addr, level, flags), pte);
2313
2314         ggtt->invalidate(vm->i915);
2315 }
2316
2317 /*
2318  * Binds an object into the global gtt with the specified cache level. The object
2319  * will be accessible to the GPU via commands whose operands reference offsets
2320  * within the global GTT as well as accessible by the GPU through the GMADR
2321  * mapped BAR (dev_priv->mm.gtt->gtt).
2322  */
2323 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2324                                      struct i915_vma *vma,
2325                                      enum i915_cache_level level,
2326                                      u32 flags)
2327 {
2328         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2329         gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
2330         unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE;
2331         struct sgt_iter iter;
2332         dma_addr_t addr;
2333         for_each_sgt_dma(addr, iter, vma->pages)
2334                 iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
2335
2336         /*
2337          * We want to flush the TLBs only after we're certain all the PTE
2338          * updates have finished.
2339          */
2340         ggtt->invalidate(vm->i915);
2341 }
2342
2343 static void nop_clear_range(struct i915_address_space *vm,
2344                             u64 start, u64 length)
2345 {
2346 }
2347
2348 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2349                                   u64 start, u64 length)
2350 {
2351         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2352         unsigned first_entry = start / I915_GTT_PAGE_SIZE;
2353         unsigned num_entries = length / I915_GTT_PAGE_SIZE;
2354         const gen8_pte_t scratch_pte = vm->scratch_pte;
2355         gen8_pte_t __iomem *gtt_base =
2356                 (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
2357         const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2358         int i;
2359
2360         if (WARN(num_entries > max_entries,
2361                  "First entry = %d; Num entries = %d (max=%d)\n",
2362                  first_entry, num_entries, max_entries))
2363                 num_entries = max_entries;
2364
2365         for (i = 0; i < num_entries; i++)
2366                 gen8_set_pte(&gtt_base[i], scratch_pte);
2367 }
2368
2369 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
2370 {
2371         struct drm_i915_private *dev_priv = vm->i915;
2372
2373         /*
2374          * Make sure the internal GAM fifo has been cleared of all GTT
2375          * writes before exiting stop_machine(). This guarantees that
2376          * any aperture accesses waiting to start in another process
2377          * cannot back up behind the GTT writes causing a hang.
2378          * The register can be any arbitrary GAM register.
2379          */
2380         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2381 }
2382
2383 struct insert_page {
2384         struct i915_address_space *vm;
2385         dma_addr_t addr;
2386         u64 offset;
2387         enum i915_cache_level level;
2388 };
2389
2390 static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
2391 {
2392         struct insert_page *arg = _arg;
2393
2394         gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
2395         bxt_vtd_ggtt_wa(arg->vm);
2396
2397         return 0;
2398 }
2399
2400 static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
2401                                           dma_addr_t addr,
2402                                           u64 offset,
2403                                           enum i915_cache_level level,
2404                                           u32 unused)
2405 {
2406         struct insert_page arg = { vm, addr, offset, level };
2407
2408         stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
2409 }
2410
2411 struct insert_entries {
2412         struct i915_address_space *vm;
2413         struct i915_vma *vma;
2414         enum i915_cache_level level;
2415         u32 flags;
2416 };
2417
2418 static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
2419 {
2420         struct insert_entries *arg = _arg;
2421
2422         gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags);
2423         bxt_vtd_ggtt_wa(arg->vm);
2424
2425         return 0;
2426 }
2427
2428 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
2429                                              struct i915_vma *vma,
2430                                              enum i915_cache_level level,
2431                                              u32 flags)
2432 {
2433         struct insert_entries arg = { vm, vma, level, flags };
2434
2435         stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
2436 }
2437
2438 struct clear_range {
2439         struct i915_address_space *vm;
2440         u64 start;
2441         u64 length;
2442 };
2443
2444 static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
2445 {
2446         struct clear_range *arg = _arg;
2447
2448         gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
2449         bxt_vtd_ggtt_wa(arg->vm);
2450
2451         return 0;
2452 }
2453
2454 static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
2455                                           u64 start,
2456                                           u64 length)
2457 {
2458         struct clear_range arg = { vm, start, length };
2459
2460         stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
2461 }
2462
2463 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2464                                   u64 start, u64 length)
2465 {
2466         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2467         unsigned first_entry = start / I915_GTT_PAGE_SIZE;
2468         unsigned num_entries = length / I915_GTT_PAGE_SIZE;
2469         gen6_pte_t scratch_pte, __iomem *gtt_base =
2470                 (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
2471         const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2472         int i;
2473
2474         if (WARN(num_entries > max_entries,
2475                  "First entry = %d; Num entries = %d (max=%d)\n",
2476                  first_entry, num_entries, max_entries))
2477                 num_entries = max_entries;
2478
2479         scratch_pte = vm->scratch_pte;
2480
2481         for (i = 0; i < num_entries; i++)
2482                 iowrite32(scratch_pte, &gtt_base[i]);
2483 }
2484
2485 static void i915_ggtt_insert_page(struct i915_address_space *vm,
2486                                   dma_addr_t addr,
2487                                   u64 offset,
2488                                   enum i915_cache_level cache_level,
2489                                   u32 unused)
2490 {
2491         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2492                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2493
2494         intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
2495 }
2496
2497 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2498                                      struct i915_vma *vma,
2499                                      enum i915_cache_level cache_level,
2500                                      u32 unused)
2501 {
2502         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2503                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2504
2505         intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
2506                                     flags);
2507 }
2508
2509 static void i915_ggtt_clear_range(struct i915_address_space *vm,
2510                                   u64 start, u64 length)
2511 {
2512         intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
2513 }
2514
2515 static int ggtt_bind_vma(struct i915_vma *vma,
2516                          enum i915_cache_level cache_level,
2517                          u32 flags)
2518 {
2519         struct drm_i915_private *i915 = vma->vm->i915;
2520         struct drm_i915_gem_object *obj = vma->obj;
2521         intel_wakeref_t wakeref;
2522         u32 pte_flags;
2523
2524         /* Applicable to VLV (gen8+ do not support RO in the GGTT) */
2525         pte_flags = 0;
2526         if (i915_gem_object_is_readonly(obj))
2527                 pte_flags |= PTE_READ_ONLY;
2528
2529         with_intel_runtime_pm(i915, wakeref)
2530                 vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
2531
2532         vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
2533
2534         /*
2535          * Without aliasing PPGTT there's no difference between
2536          * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2537          * upgrade to both bound if we bind either to avoid double-binding.
2538          */
2539         vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
2540
2541         return 0;
2542 }
2543
2544 static void ggtt_unbind_vma(struct i915_vma *vma)
2545 {
2546         struct drm_i915_private *i915 = vma->vm->i915;
2547         intel_wakeref_t wakeref;
2548
2549         with_intel_runtime_pm(i915, wakeref)
2550                 vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
2551 }
2552
2553 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2554                                  enum i915_cache_level cache_level,
2555                                  u32 flags)
2556 {
2557         struct drm_i915_private *i915 = vma->vm->i915;
2558         u32 pte_flags;
2559         int ret;
2560
2561         /* Currently applicable only to VLV */
2562         pte_flags = 0;
2563         if (i915_gem_object_is_readonly(vma->obj))
2564                 pte_flags |= PTE_READ_ONLY;
2565
2566         if (flags & I915_VMA_LOCAL_BIND) {
2567                 struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
2568
2569                 if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
2570                         ret = appgtt->vm.allocate_va_range(&appgtt->vm,
2571                                                            vma->node.start,
2572                                                            vma->size);
2573                         if (ret)
2574                                 return ret;
2575                 }
2576
2577                 appgtt->vm.insert_entries(&appgtt->vm, vma, cache_level,
2578                                           pte_flags);
2579         }
2580
2581         if (flags & I915_VMA_GLOBAL_BIND) {
2582                 intel_wakeref_t wakeref;
2583
2584                 with_intel_runtime_pm(i915, wakeref) {
2585                         vma->vm->insert_entries(vma->vm, vma,
2586                                                 cache_level, pte_flags);
2587                 }
2588         }
2589
2590         return 0;
2591 }
2592
2593 static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
2594 {
2595         struct drm_i915_private *i915 = vma->vm->i915;
2596
2597         if (vma->flags & I915_VMA_GLOBAL_BIND) {
2598                 struct i915_address_space *vm = vma->vm;
2599                 intel_wakeref_t wakeref;
2600
2601                 with_intel_runtime_pm(i915, wakeref)
2602                         vm->clear_range(vm, vma->node.start, vma->size);
2603         }
2604
2605         if (vma->flags & I915_VMA_LOCAL_BIND) {
2606                 struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->vm;
2607
2608                 vm->clear_range(vm, vma->node.start, vma->size);
2609         }
2610 }
2611
2612 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
2613                                struct sg_table *pages)
2614 {
2615         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2616         struct device *kdev = &dev_priv->drm.pdev->dev;
2617         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2618
2619         if (unlikely(ggtt->do_idle_maps)) {
2620                 if (i915_gem_wait_for_idle(dev_priv, 0, MAX_SCHEDULE_TIMEOUT)) {
2621                         DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
2622                         /* Wait a bit, in hopes it avoids the hang */
2623                         udelay(10);
2624                 }
2625         }
2626
2627         dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL);
2628 }
2629
2630 static int ggtt_set_pages(struct i915_vma *vma)
2631 {
2632         int ret;
2633
2634         GEM_BUG_ON(vma->pages);
2635
2636         ret = i915_get_ggtt_vma_pages(vma);
2637         if (ret)
2638                 return ret;
2639
2640         vma->page_sizes = vma->obj->mm.page_sizes;
2641
2642         return 0;
2643 }
2644
2645 static void i915_gtt_color_adjust(const struct drm_mm_node *node,
2646                                   unsigned long color,
2647                                   u64 *start,
2648                                   u64 *end)
2649 {
2650         if (node->allocated && node->color != color)
2651                 *start += I915_GTT_PAGE_SIZE;
2652
2653         /* Also leave a space between the unallocated reserved node after the
2654          * GTT and any objects within the GTT, i.e. we use the color adjustment
2655          * to insert a guard page to prevent prefetches crossing over the
2656          * GTT boundary.
2657          */
2658         node = list_next_entry(node, node_list);
2659         if (node->color != color)
2660                 *end -= I915_GTT_PAGE_SIZE;
2661 }
2662
2663 int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
2664 {
2665         struct i915_ggtt *ggtt = &i915->ggtt;
2666         struct i915_hw_ppgtt *ppgtt;
2667         int err;
2668
2669         ppgtt = i915_ppgtt_create(i915);
2670         if (IS_ERR(ppgtt))
2671                 return PTR_ERR(ppgtt);
2672
2673         if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) {
2674                 err = -ENODEV;
2675                 goto err_ppgtt;
2676         }
2677
2678         /*
2679          * Note we only pre-allocate as far as the end of the global
2680          * GTT. On 48b / 4-level page-tables, the difference is very,
2681          * very significant! We have to preallocate as GVT/vgpu does
2682          * not like the page directory disappearing.
2683          */
2684         err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total);
2685         if (err)
2686                 goto err_ppgtt;
2687
2688         i915->mm.aliasing_ppgtt = ppgtt;
2689
2690         GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
2691         ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
2692
2693         GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
2694         ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
2695
2696         return 0;
2697
2698 err_ppgtt:
2699         i915_ppgtt_put(ppgtt);
2700         return err;
2701 }
2702
2703 void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915)
2704 {
2705         struct i915_ggtt *ggtt = &i915->ggtt;
2706         struct i915_hw_ppgtt *ppgtt;
2707
2708         ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt);
2709         if (!ppgtt)
2710                 return;
2711
2712         i915_ppgtt_put(ppgtt);
2713
2714         ggtt->vm.vma_ops.bind_vma   = ggtt_bind_vma;
2715         ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
2716 }
2717
2718 int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
2719 {
2720         /* Let GEM Manage all of the aperture.
2721          *
2722          * However, leave one page at the end still bound to the scratch page.
2723          * There are a number of places where the hardware apparently prefetches
2724          * past the end of the object, and we've seen multiple hangs with the
2725          * GPU head pointer stuck in a batchbuffer bound at the last page of the
2726          * aperture.  One page should be enough to keep any prefetching inside
2727          * of the aperture.
2728          */
2729         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2730         unsigned long hole_start, hole_end;
2731         struct drm_mm_node *entry;
2732         int ret;
2733
2734         /*
2735          * GuC requires all resources that we're sharing with it to be placed in
2736          * non-WOPCM memory. If GuC is not present or not in use we still need a
2737          * small bias as ring wraparound at offset 0 sometimes hangs. No idea
2738          * why.
2739          */
2740         ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE,
2741                                intel_guc_reserved_gtt_size(&dev_priv->guc));
2742
2743         ret = intel_vgt_balloon(dev_priv);
2744         if (ret)
2745                 return ret;
2746
2747         /* Reserve a mappable slot for our lockless error capture */
2748         ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, &ggtt->error_capture,
2749                                           PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
2750                                           0, ggtt->mappable_end,
2751                                           DRM_MM_INSERT_LOW);
2752         if (ret)
2753                 return ret;
2754
2755         /* Clear any non-preallocated blocks */
2756         drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
2757                 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2758                               hole_start, hole_end);
2759                 ggtt->vm.clear_range(&ggtt->vm, hole_start,
2760                                      hole_end - hole_start);
2761         }
2762
2763         /* And finally clear the reserved guard page */
2764         ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
2765
2766         if (INTEL_PPGTT(dev_priv) == INTEL_PPGTT_ALIASING) {
2767                 ret = i915_gem_init_aliasing_ppgtt(dev_priv);
2768                 if (ret)
2769                         goto err;
2770         }
2771
2772         return 0;
2773
2774 err:
2775         drm_mm_remove_node(&ggtt->error_capture);
2776         return ret;
2777 }
2778
2779 /**
2780  * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization
2781  * @dev_priv: i915 device
2782  */
2783 void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
2784 {
2785         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2786         struct i915_vma *vma, *vn;
2787         struct pagevec *pvec;
2788
2789         ggtt->vm.closed = true;
2790
2791         mutex_lock(&dev_priv->drm.struct_mutex);
2792         i915_gem_fini_aliasing_ppgtt(dev_priv);
2793
2794         list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
2795                 WARN_ON(i915_vma_unbind(vma));
2796
2797         if (drm_mm_node_allocated(&ggtt->error_capture))
2798                 drm_mm_remove_node(&ggtt->error_capture);
2799
2800         if (drm_mm_initialized(&ggtt->vm.mm)) {
2801                 intel_vgt_deballoon(dev_priv);
2802                 i915_address_space_fini(&ggtt->vm);
2803         }
2804
2805         ggtt->vm.cleanup(&ggtt->vm);
2806
2807         pvec = &dev_priv->mm.wc_stash.pvec;
2808         if (pvec->nr) {
2809                 set_pages_array_wb(pvec->pages, pvec->nr);
2810                 __pagevec_release(pvec);
2811         }
2812
2813         mutex_unlock(&dev_priv->drm.struct_mutex);
2814
2815         arch_phys_wc_del(ggtt->mtrr);
2816         io_mapping_fini(&ggtt->iomap);
2817
2818         i915_gem_cleanup_stolen(dev_priv);
2819 }
2820
2821 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
2822 {
2823         snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2824         snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2825         return snb_gmch_ctl << 20;
2826 }
2827
2828 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
2829 {
2830         bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2831         bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2832         if (bdw_gmch_ctl)
2833                 bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2834
2835 #ifdef CONFIG_X86_32
2836         /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
2837         if (bdw_gmch_ctl > 4)
2838                 bdw_gmch_ctl = 4;
2839 #endif
2840
2841         return bdw_gmch_ctl << 20;
2842 }
2843
2844 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
2845 {
2846         gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
2847         gmch_ctrl &= SNB_GMCH_GGMS_MASK;
2848
2849         if (gmch_ctrl)
2850                 return 1 << (20 + gmch_ctrl);
2851
2852         return 0;
2853 }
2854
2855 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
2856 {
2857         struct drm_i915_private *dev_priv = ggtt->vm.i915;
2858         struct pci_dev *pdev = dev_priv->drm.pdev;
2859         phys_addr_t phys_addr;
2860         int ret;
2861
2862         /* For Modern GENs the PTEs and register space are split in the BAR */
2863         phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
2864
2865         /*
2866          * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range
2867          * will be dropped. For WC mappings in general we have 64 byte burst
2868          * writes when the WC buffer is flushed, so we can't use it, but have to
2869          * resort to an uncached mapping. The WC issue is easily caught by the
2870          * readback check when writing GTT PTE entries.
2871          */
2872         if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10)
2873                 ggtt->gsm = ioremap_nocache(phys_addr, size);
2874         else
2875                 ggtt->gsm = ioremap_wc(phys_addr, size);
2876         if (!ggtt->gsm) {
2877                 DRM_ERROR("Failed to map the ggtt page table\n");
2878                 return -ENOMEM;
2879         }
2880
2881         ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
2882         if (ret) {
2883                 DRM_ERROR("Scratch setup failed\n");
2884                 /* iounmap will also get called at remove, but meh */
2885                 iounmap(ggtt->gsm);
2886                 return ret;
2887         }
2888
2889         ggtt->vm.scratch_pte =
2890                 ggtt->vm.pte_encode(ggtt->vm.scratch_page.daddr,
2891                                     I915_CACHE_NONE, 0);
2892
2893         return 0;
2894 }
2895
2896 static struct intel_ppat_entry *
2897 __alloc_ppat_entry(struct intel_ppat *ppat, unsigned int index, u8 value)
2898 {
2899         struct intel_ppat_entry *entry = &ppat->entries[index];
2900
2901         GEM_BUG_ON(index >= ppat->max_entries);
2902         GEM_BUG_ON(test_bit(index, ppat->used));
2903
2904         entry->ppat = ppat;
2905         entry->value = value;
2906         kref_init(&entry->ref);
2907         set_bit(index, ppat->used);
2908         set_bit(index, ppat->dirty);
2909
2910         return entry;
2911 }
2912
2913 static void __free_ppat_entry(struct intel_ppat_entry *entry)
2914 {
2915         struct intel_ppat *ppat = entry->ppat;
2916         unsigned int index = entry - ppat->entries;
2917
2918         GEM_BUG_ON(index >= ppat->max_entries);
2919         GEM_BUG_ON(!test_bit(index, ppat->used));
2920
2921         entry->value = ppat->clear_value;
2922         clear_bit(index, ppat->used);
2923         set_bit(index, ppat->dirty);
2924 }
2925
2926 /**
2927  * intel_ppat_get - get a usable PPAT entry
2928  * @i915: i915 device instance
2929  * @value: the PPAT value required by the caller
2930  *
2931  * The function tries to search if there is an existing PPAT entry which
2932  * matches with the required value. If perfectly matched, the existing PPAT
2933  * entry will be used. If only partially matched, it will try to check if
2934  * there is any available PPAT index. If yes, it will allocate a new PPAT
2935  * index for the required entry and update the HW. If not, the partially
2936  * matched entry will be used.
2937  */
2938 const struct intel_ppat_entry *
2939 intel_ppat_get(struct drm_i915_private *i915, u8 value)
2940 {
2941         struct intel_ppat *ppat = &i915->ppat;
2942         struct intel_ppat_entry *entry = NULL;
2943         unsigned int scanned, best_score;
2944         int i;
2945
2946         GEM_BUG_ON(!ppat->max_entries);
2947
2948         scanned = best_score = 0;
2949         for_each_set_bit(i, ppat->used, ppat->max_entries) {
2950                 unsigned int score;
2951
2952                 score = ppat->match(ppat->entries[i].value, value);
2953                 if (score > best_score) {
2954                         entry = &ppat->entries[i];
2955                         if (score == INTEL_PPAT_PERFECT_MATCH) {
2956                                 kref_get(&entry->ref);
2957                                 return entry;
2958                         }
2959                         best_score = score;
2960                 }
2961                 scanned++;
2962         }
2963
2964         if (scanned == ppat->max_entries) {
2965                 if (!entry)
2966                         return ERR_PTR(-ENOSPC);
2967
2968                 kref_get(&entry->ref);
2969                 return entry;
2970         }
2971
2972         i = find_first_zero_bit(ppat->used, ppat->max_entries);
2973         entry = __alloc_ppat_entry(ppat, i, value);
2974         ppat->update_hw(i915);
2975         return entry;
2976 }
2977
2978 static void release_ppat(struct kref *kref)
2979 {
2980         struct intel_ppat_entry *entry =
2981                 container_of(kref, struct intel_ppat_entry, ref);
2982         struct drm_i915_private *i915 = entry->ppat->i915;
2983
2984         __free_ppat_entry(entry);
2985         entry->ppat->update_hw(i915);
2986 }
2987
2988 /**
2989  * intel_ppat_put - put back the PPAT entry got from intel_ppat_get()
2990  * @entry: an intel PPAT entry
2991  *
2992  * Put back the PPAT entry got from intel_ppat_get(). If the PPAT index of the
2993  * entry is dynamically allocated, its reference count will be decreased. Once
2994  * the reference count becomes into zero, the PPAT index becomes free again.
2995  */
2996 void intel_ppat_put(const struct intel_ppat_entry *entry)
2997 {
2998         struct intel_ppat *ppat = entry->ppat;
2999         unsigned int index = entry - ppat->entries;
3000
3001         GEM_BUG_ON(!ppat->max_entries);
3002
3003         kref_put(&ppat->entries[index].ref, release_ppat);
3004 }
3005
3006 static void cnl_private_pat_update_hw(struct drm_i915_private *dev_priv)
3007 {
3008         struct intel_ppat *ppat = &dev_priv->ppat;
3009         int i;
3010
3011         for_each_set_bit(i, ppat->dirty, ppat->max_entries) {
3012                 I915_WRITE(GEN10_PAT_INDEX(i), ppat->entries[i].value);
3013                 clear_bit(i, ppat->dirty);
3014         }
3015 }
3016
3017 static void bdw_private_pat_update_hw(struct drm_i915_private *dev_priv)
3018 {
3019         struct intel_ppat *ppat = &dev_priv->ppat;
3020         u64 pat = 0;
3021         int i;
3022
3023         for (i = 0; i < ppat->max_entries; i++)
3024                 pat |= GEN8_PPAT(i, ppat->entries[i].value);
3025
3026         bitmap_clear(ppat->dirty, 0, ppat->max_entries);
3027
3028         I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
3029         I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
3030 }
3031
3032 static unsigned int bdw_private_pat_match(u8 src, u8 dst)
3033 {
3034         unsigned int score = 0;
3035         enum {
3036                 AGE_MATCH = BIT(0),
3037                 TC_MATCH = BIT(1),
3038                 CA_MATCH = BIT(2),
3039         };
3040
3041         /* Cache attribute has to be matched. */
3042         if (GEN8_PPAT_GET_CA(src) != GEN8_PPAT_GET_CA(dst))
3043                 return 0;
3044
3045         score |= CA_MATCH;
3046
3047         if (GEN8_PPAT_GET_TC(src) == GEN8_PPAT_GET_TC(dst))
3048                 score |= TC_MATCH;
3049
3050         if (GEN8_PPAT_GET_AGE(src) == GEN8_PPAT_GET_AGE(dst))
3051                 score |= AGE_MATCH;
3052
3053         if (score == (AGE_MATCH | TC_MATCH | CA_MATCH))
3054                 return INTEL_PPAT_PERFECT_MATCH;
3055
3056         return score;
3057 }
3058
3059 static unsigned int chv_private_pat_match(u8 src, u8 dst)
3060 {
3061         return (CHV_PPAT_GET_SNOOP(src) == CHV_PPAT_GET_SNOOP(dst)) ?
3062                 INTEL_PPAT_PERFECT_MATCH : 0;
3063 }
3064
3065 static void cnl_setup_private_ppat(struct intel_ppat *ppat)
3066 {
3067         ppat->max_entries = 8;
3068         ppat->update_hw = cnl_private_pat_update_hw;
3069         ppat->match = bdw_private_pat_match;
3070         ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
3071
3072         __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);
3073         __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
3074         __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
3075         __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);
3076         __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
3077         __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
3078         __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
3079         __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3080 }
3081
3082 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
3083  * bits. When using advanced contexts each context stores its own PAT, but
3084  * writing this data shouldn't be harmful even in those cases. */
3085 static void bdw_setup_private_ppat(struct intel_ppat *ppat)
3086 {
3087         ppat->max_entries = 8;
3088         ppat->update_hw = bdw_private_pat_update_hw;
3089         ppat->match = bdw_private_pat_match;
3090         ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
3091
3092         if (!HAS_PPGTT(ppat->i915)) {
3093                 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
3094                  * so RTL will always use the value corresponding to
3095                  * pat_sel = 000".
3096                  * So let's disable cache for GGTT to avoid screen corruptions.
3097                  * MOCS still can be used though.
3098                  * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
3099                  * before this patch, i.e. the same uncached + snooping access
3100                  * like on gen6/7 seems to be in effect.
3101                  * - So this just fixes blitter/render access. Again it looks
3102                  * like it's not just uncached access, but uncached + snooping.
3103                  * So we can still hold onto all our assumptions wrt cpu
3104                  * clflushing on LLC machines.
3105                  */
3106                 __alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC);
3107                 return;
3108         }
3109
3110         __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);      /* for normal objects, no eLLC */
3111         __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);  /* for something pointing to ptes? */
3112         __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);  /* for scanout with eLLC */
3113         __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);                      /* Uncached objects, mostly for scanout */
3114         __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
3115         __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
3116         __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
3117         __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3118 }
3119
3120 static void chv_setup_private_ppat(struct intel_ppat *ppat)
3121 {
3122         ppat->max_entries = 8;
3123         ppat->update_hw = bdw_private_pat_update_hw;
3124         ppat->match = chv_private_pat_match;
3125         ppat->clear_value = CHV_PPAT_SNOOP;
3126
3127         /*
3128          * Map WB on BDW to snooped on CHV.
3129          *
3130          * Only the snoop bit has meaning for CHV, the rest is
3131          * ignored.
3132          *
3133          * The hardware will never snoop for certain types of accesses:
3134          * - CPU GTT (GMADR->GGTT->no snoop->memory)
3135          * - PPGTT page tables
3136          * - some other special cycles
3137          *
3138          * As with BDW, we also need to consider the following for GT accesses:
3139          * "For GGTT, there is NO pat_sel[2:0] from the entry,
3140          * so RTL will always use the value corresponding to
3141          * pat_sel = 000".
3142          * Which means we must set the snoop bit in PAT entry 0
3143          * in order to keep the global status page working.
3144          */
3145
3146         __alloc_ppat_entry(ppat, 0, CHV_PPAT_SNOOP);
3147         __alloc_ppat_entry(ppat, 1, 0);
3148         __alloc_ppat_entry(ppat, 2, 0);
3149         __alloc_ppat_entry(ppat, 3, 0);
3150         __alloc_ppat_entry(ppat, 4, CHV_PPAT_SNOOP);
3151         __alloc_ppat_entry(ppat, 5, CHV_PPAT_SNOOP);
3152         __alloc_ppat_entry(ppat, 6, CHV_PPAT_SNOOP);
3153         __alloc_ppat_entry(ppat, 7, CHV_PPAT_SNOOP);
3154 }
3155
3156 static void gen6_gmch_remove(struct i915_address_space *vm)
3157 {
3158         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
3159
3160         iounmap(ggtt->gsm);
3161         cleanup_scratch_page(vm);
3162 }
3163
3164 static void setup_private_pat(struct drm_i915_private *dev_priv)
3165 {
3166         struct intel_ppat *ppat = &dev_priv->ppat;
3167         int i;
3168
3169         ppat->i915 = dev_priv;
3170
3171         if (INTEL_GEN(dev_priv) >= 10)
3172                 cnl_setup_private_ppat(ppat);
3173         else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
3174                 chv_setup_private_ppat(ppat);
3175         else
3176                 bdw_setup_private_ppat(ppat);
3177
3178         GEM_BUG_ON(ppat->max_entries > INTEL_MAX_PPAT_ENTRIES);
3179
3180         for_each_clear_bit(i, ppat->used, ppat->max_entries) {
3181                 ppat->entries[i].value = ppat->clear_value;
3182                 ppat->entries[i].ppat = ppat;
3183                 set_bit(i, ppat->dirty);
3184         }
3185
3186         ppat->update_hw(dev_priv);
3187 }
3188
3189 static int gen8_gmch_probe(struct i915_ggtt *ggtt)
3190 {
3191         struct drm_i915_private *dev_priv = ggtt->vm.i915;
3192         struct pci_dev *pdev = dev_priv->drm.pdev;
3193         unsigned int size;
3194         u16 snb_gmch_ctl;
3195         int err;
3196
3197         /* TODO: We're not aware of mappable constraints on gen8 yet */
3198         ggtt->gmadr =
3199                 (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
3200                                                  pci_resource_len(pdev, 2));
3201         ggtt->mappable_end = resource_size(&ggtt->gmadr);
3202
3203         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
3204         if (!err)
3205                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
3206         if (err)
3207                 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
3208
3209         pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3210         if (IS_CHERRYVIEW(dev_priv))
3211                 size = chv_get_total_gtt_size(snb_gmch_ctl);
3212         else
3213                 size = gen8_get_total_gtt_size(snb_gmch_ctl);
3214
3215         ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
3216         ggtt->vm.cleanup = gen6_gmch_remove;
3217         ggtt->vm.insert_page = gen8_ggtt_insert_page;
3218         ggtt->vm.clear_range = nop_clear_range;
3219         if (intel_scanout_needs_vtd_wa(dev_priv))
3220                 ggtt->vm.clear_range = gen8_ggtt_clear_range;
3221
3222         ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
3223
3224         /* Serialize GTT updates with aperture access on BXT if VT-d is on. */
3225         if (intel_ggtt_update_needs_vtd_wa(dev_priv) ||
3226             IS_CHERRYVIEW(dev_priv) /* fails with concurrent use/update */) {
3227                 ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
3228                 ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
3229                 if (ggtt->vm.clear_range != nop_clear_range)
3230                         ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
3231
3232                 /* Prevent recursively calling stop_machine() and deadlocks. */
3233                 dev_info(dev_priv->drm.dev,
3234                          "Disabling error capture for VT-d workaround\n");
3235                 i915_disable_error_state(dev_priv, -ENODEV);
3236         }
3237
3238         ggtt->invalidate = gen6_ggtt_invalidate;
3239
3240         ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
3241         ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
3242         ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
3243         ggtt->vm.vma_ops.clear_pages = clear_pages;
3244
3245         ggtt->vm.pte_encode = gen8_pte_encode;
3246
3247         setup_private_pat(dev_priv);
3248
3249         return ggtt_probe_common(ggtt, size);
3250 }
3251
3252 static int gen6_gmch_probe(struct i915_ggtt *ggtt)
3253 {
3254         struct drm_i915_private *dev_priv = ggtt->vm.i915;
3255         struct pci_dev *pdev = dev_priv->drm.pdev;
3256         unsigned int size;
3257         u16 snb_gmch_ctl;
3258         int err;
3259
3260         ggtt->gmadr =
3261                 (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
3262                                                  pci_resource_len(pdev, 2));
3263         ggtt->mappable_end = resource_size(&ggtt->gmadr);
3264
3265         /* 64/512MB is the current min/max we actually know of, but this is just
3266          * a coarse sanity check.
3267          */
3268         if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
3269                 DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end);
3270                 return -ENXIO;
3271         }
3272
3273         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
3274         if (!err)
3275                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
3276         if (err)
3277                 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
3278         pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3279
3280         size = gen6_get_total_gtt_size(snb_gmch_ctl);
3281         ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
3282
3283         ggtt->vm.clear_range = gen6_ggtt_clear_range;
3284         ggtt->vm.insert_page = gen6_ggtt_insert_page;
3285         ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
3286         ggtt->vm.cleanup = gen6_gmch_remove;
3287
3288         ggtt->invalidate = gen6_ggtt_invalidate;
3289
3290         if (HAS_EDRAM(dev_priv))
3291                 ggtt->vm.pte_encode = iris_pte_encode;
3292         else if (IS_HASWELL(dev_priv))
3293                 ggtt->vm.pte_encode = hsw_pte_encode;
3294         else if (IS_VALLEYVIEW(dev_priv))
3295                 ggtt->vm.pte_encode = byt_pte_encode;
3296         else if (INTEL_GEN(dev_priv) >= 7)
3297                 ggtt->vm.pte_encode = ivb_pte_encode;
3298         else
3299                 ggtt->vm.pte_encode = snb_pte_encode;
3300
3301         ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
3302         ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
3303         ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
3304         ggtt->vm.vma_ops.clear_pages = clear_pages;
3305
3306         return ggtt_probe_common(ggtt, size);
3307 }
3308
3309 static void i915_gmch_remove(struct i915_address_space *vm)
3310 {
3311         intel_gmch_remove();
3312 }
3313
3314 static int i915_gmch_probe(struct i915_ggtt *ggtt)
3315 {
3316         struct drm_i915_private *dev_priv = ggtt->vm.i915;
3317         phys_addr_t gmadr_base;
3318         int ret;
3319
3320         ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
3321         if (!ret) {
3322                 DRM_ERROR("failed to set up gmch\n");
3323                 return -EIO;
3324         }
3325
3326         intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end);
3327
3328         ggtt->gmadr =
3329                 (struct resource) DEFINE_RES_MEM(gmadr_base,
3330                                                  ggtt->mappable_end);
3331
3332         ggtt->do_idle_maps = needs_idle_maps(dev_priv);
3333         ggtt->vm.insert_page = i915_ggtt_insert_page;
3334         ggtt->vm.insert_entries = i915_ggtt_insert_entries;
3335         ggtt->vm.clear_range = i915_ggtt_clear_range;
3336         ggtt->vm.cleanup = i915_gmch_remove;
3337
3338         ggtt->invalidate = gmch_ggtt_invalidate;
3339
3340         ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
3341         ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
3342         ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
3343         ggtt->vm.vma_ops.clear_pages = clear_pages;
3344
3345         if (unlikely(ggtt->do_idle_maps))
3346                 DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3347
3348         return 0;
3349 }
3350
3351 /**
3352  * i915_ggtt_probe_hw - Probe GGTT hardware location
3353  * @dev_priv: i915 device
3354  */
3355 int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
3356 {
3357         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3358         int ret;
3359
3360         ggtt->vm.i915 = dev_priv;
3361         ggtt->vm.dma = &dev_priv->drm.pdev->dev;
3362
3363         if (INTEL_GEN(dev_priv) <= 5)
3364                 ret = i915_gmch_probe(ggtt);
3365         else if (INTEL_GEN(dev_priv) < 8)
3366                 ret = gen6_gmch_probe(ggtt);
3367         else
3368                 ret = gen8_gmch_probe(ggtt);
3369         if (ret)
3370                 return ret;
3371
3372         /* Trim the GGTT to fit the GuC mappable upper range (when enabled).
3373          * This is easier than doing range restriction on the fly, as we
3374          * currently don't have any bits spare to pass in this upper
3375          * restriction!
3376          */
3377         if (USES_GUC(dev_priv)) {
3378                 ggtt->vm.total = min_t(u64, ggtt->vm.total, GUC_GGTT_TOP);
3379                 ggtt->mappable_end =
3380                         min_t(u64, ggtt->mappable_end, ggtt->vm.total);
3381         }
3382
3383         if ((ggtt->vm.total - 1) >> 32) {
3384                 DRM_ERROR("We never expected a Global GTT with more than 32bits"
3385                           " of address space! Found %lldM!\n",
3386                           ggtt->vm.total >> 20);
3387                 ggtt->vm.total = 1ULL << 32;
3388                 ggtt->mappable_end =
3389                         min_t(u64, ggtt->mappable_end, ggtt->vm.total);
3390         }
3391
3392         if (ggtt->mappable_end > ggtt->vm.total) {
3393                 DRM_ERROR("mappable aperture extends past end of GGTT,"
3394                           " aperture=%pa, total=%llx\n",
3395                           &ggtt->mappable_end, ggtt->vm.total);
3396                 ggtt->mappable_end = ggtt->vm.total;
3397         }
3398
3399         /* GMADR is the PCI mmio aperture into the global GTT. */
3400         DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20);
3401         DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20);
3402         DRM_DEBUG_DRIVER("DSM size = %lluM\n",
3403                          (u64)resource_size(&intel_graphics_stolen_res) >> 20);
3404         if (intel_vtd_active())
3405                 DRM_INFO("VT-d active for gfx access\n");
3406
3407         return 0;
3408 }
3409
3410 /**
3411  * i915_ggtt_init_hw - Initialize GGTT hardware
3412  * @dev_priv: i915 device
3413  */
3414 int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
3415 {
3416         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3417         int ret;
3418
3419         stash_init(&dev_priv->mm.wc_stash);
3420
3421         /* Note that we use page colouring to enforce a guard page at the
3422          * end of the address space. This is required as the CS may prefetch
3423          * beyond the end of the batch buffer, across the page boundary,
3424          * and beyond the end of the GTT if we do not provide a guard.
3425          */
3426         mutex_lock(&dev_priv->drm.struct_mutex);
3427         i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
3428
3429         ggtt->vm.is_ggtt = true;
3430
3431         /* Only VLV supports read-only GGTT mappings */
3432         ggtt->vm.has_read_only = IS_VALLEYVIEW(dev_priv);
3433
3434         if (!HAS_LLC(dev_priv) && !HAS_PPGTT(dev_priv))
3435                 ggtt->vm.mm.color_adjust = i915_gtt_color_adjust;
3436         mutex_unlock(&dev_priv->drm.struct_mutex);
3437
3438         if (!io_mapping_init_wc(&dev_priv->ggtt.iomap,
3439                                 dev_priv->ggtt.gmadr.start,
3440                                 dev_priv->ggtt.mappable_end)) {
3441                 ret = -EIO;
3442                 goto out_gtt_cleanup;
3443         }
3444
3445         ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end);
3446
3447         /*
3448          * Initialise stolen early so that we may reserve preallocated
3449          * objects for the BIOS to KMS transition.
3450          */
3451         ret = i915_gem_init_stolen(dev_priv);
3452         if (ret)
3453                 goto out_gtt_cleanup;
3454
3455         return 0;
3456
3457 out_gtt_cleanup:
3458         ggtt->vm.cleanup(&ggtt->vm);
3459         return ret;
3460 }
3461
3462 int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
3463 {
3464         if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt())
3465                 return -EIO;
3466
3467         return 0;
3468 }
3469
3470 void i915_ggtt_enable_guc(struct drm_i915_private *i915)
3471 {
3472         GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate);
3473
3474         i915->ggtt.invalidate = guc_ggtt_invalidate;
3475
3476         i915_ggtt_invalidate(i915);
3477 }
3478
3479 void i915_ggtt_disable_guc(struct drm_i915_private *i915)
3480 {
3481         /* XXX Temporary pardon for error unload */
3482         if (i915->ggtt.invalidate == gen6_ggtt_invalidate)
3483                 return;
3484
3485         /* We should only be called after i915_ggtt_enable_guc() */
3486         GEM_BUG_ON(i915->ggtt.invalidate != guc_ggtt_invalidate);
3487
3488         i915->ggtt.invalidate = gen6_ggtt_invalidate;
3489
3490         i915_ggtt_invalidate(i915);
3491 }
3492
3493 void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
3494 {
3495         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3496         struct i915_vma *vma, *vn;
3497
3498         i915_check_and_clear_faults(dev_priv);
3499
3500         mutex_lock(&ggtt->vm.mutex);
3501
3502         /* First fill our portion of the GTT with scratch pages */
3503         ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
3504         ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
3505
3506         /* clflush objects bound into the GGTT and rebind them. */
3507         list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
3508                 struct drm_i915_gem_object *obj = vma->obj;
3509
3510                 if (!(vma->flags & I915_VMA_GLOBAL_BIND))
3511                         continue;
3512
3513                 mutex_unlock(&ggtt->vm.mutex);
3514
3515                 if (!i915_vma_unbind(vma))
3516                         goto lock;
3517
3518                 WARN_ON(i915_vma_bind(vma,
3519                                       obj ? obj->cache_level : 0,
3520                                       PIN_UPDATE));
3521                 if (obj)
3522                         WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
3523
3524 lock:
3525                 mutex_lock(&ggtt->vm.mutex);
3526         }
3527
3528         ggtt->vm.closed = false;
3529         i915_ggtt_invalidate(dev_priv);
3530
3531         mutex_unlock(&ggtt->vm.mutex);
3532
3533         if (INTEL_GEN(dev_priv) >= 8) {
3534                 struct intel_ppat *ppat = &dev_priv->ppat;
3535
3536                 bitmap_set(ppat->dirty, 0, ppat->max_entries);
3537                 dev_priv->ppat.update_hw(dev_priv);
3538                 return;
3539         }
3540 }
3541
3542 static struct scatterlist *
3543 rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset,
3544              unsigned int width, unsigned int height,
3545              unsigned int stride,
3546              struct sg_table *st, struct scatterlist *sg)
3547 {
3548         unsigned int column, row;
3549         unsigned int src_idx;
3550
3551         for (column = 0; column < width; column++) {
3552                 src_idx = stride * (height - 1) + column + offset;
3553                 for (row = 0; row < height; row++) {
3554                         st->nents++;
3555                         /* We don't need the pages, but need to initialize
3556                          * the entries so the sg list can be happily traversed.
3557                          * The only thing we need are DMA addresses.
3558                          */
3559                         sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0);
3560                         sg_dma_address(sg) =
3561                                 i915_gem_object_get_dma_address(obj, src_idx);
3562                         sg_dma_len(sg) = I915_GTT_PAGE_SIZE;
3563                         sg = sg_next(sg);
3564                         src_idx -= stride;
3565                 }
3566         }
3567
3568         return sg;
3569 }
3570
3571 static noinline struct sg_table *
3572 intel_rotate_pages(struct intel_rotation_info *rot_info,
3573                    struct drm_i915_gem_object *obj)
3574 {
3575         unsigned int size = intel_rotation_info_size(rot_info);
3576         struct sg_table *st;
3577         struct scatterlist *sg;
3578         int ret = -ENOMEM;
3579         int i;
3580
3581         /* Allocate target SG list. */
3582         st = kmalloc(sizeof(*st), GFP_KERNEL);
3583         if (!st)
3584                 goto err_st_alloc;
3585
3586         ret = sg_alloc_table(st, size, GFP_KERNEL);
3587         if (ret)
3588                 goto err_sg_alloc;
3589
3590         st->nents = 0;
3591         sg = st->sgl;
3592
3593         for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
3594                 sg = rotate_pages(obj, rot_info->plane[i].offset,
3595                                   rot_info->plane[i].width, rot_info->plane[i].height,
3596                                   rot_info->plane[i].stride, st, sg);
3597         }
3598
3599         return st;
3600
3601 err_sg_alloc:
3602         kfree(st);
3603 err_st_alloc:
3604
3605         DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
3606                          obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
3607
3608         return ERR_PTR(ret);
3609 }
3610
3611 static noinline struct sg_table *
3612 intel_partial_pages(const struct i915_ggtt_view *view,
3613                     struct drm_i915_gem_object *obj)
3614 {
3615         struct sg_table *st;
3616         struct scatterlist *sg, *iter;
3617         unsigned int count = view->partial.size;
3618         unsigned int offset;
3619         int ret = -ENOMEM;
3620
3621         st = kmalloc(sizeof(*st), GFP_KERNEL);
3622         if (!st)
3623                 goto err_st_alloc;
3624
3625         ret = sg_alloc_table(st, count, GFP_KERNEL);
3626         if (ret)
3627                 goto err_sg_alloc;
3628
3629         iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
3630         GEM_BUG_ON(!iter);
3631
3632         sg = st->sgl;
3633         st->nents = 0;
3634         do {
3635                 unsigned int len;
3636
3637                 len = min(iter->length - (offset << PAGE_SHIFT),
3638                           count << PAGE_SHIFT);
3639                 sg_set_page(sg, NULL, len, 0);
3640                 sg_dma_address(sg) =
3641                         sg_dma_address(iter) + (offset << PAGE_SHIFT);
3642                 sg_dma_len(sg) = len;
3643
3644                 st->nents++;
3645                 count -= len >> PAGE_SHIFT;
3646                 if (count == 0) {
3647                         sg_mark_end(sg);
3648                         i915_sg_trim(st); /* Drop any unused tail entries. */
3649
3650                         return st;
3651                 }
3652
3653                 sg = __sg_next(sg);
3654                 iter = __sg_next(iter);
3655                 offset = 0;
3656         } while (1);
3657
3658 err_sg_alloc:
3659         kfree(st);
3660 err_st_alloc:
3661         return ERR_PTR(ret);
3662 }
3663
3664 static int
3665 i915_get_ggtt_vma_pages(struct i915_vma *vma)
3666 {
3667         int ret;
3668
3669         /* The vma->pages are only valid within the lifespan of the borrowed
3670          * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
3671          * must be the vma->pages. A simple rule is that vma->pages must only
3672          * be accessed when the obj->mm.pages are pinned.
3673          */
3674         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
3675
3676         switch (vma->ggtt_view.type) {
3677         default:
3678                 GEM_BUG_ON(vma->ggtt_view.type);
3679                 /* fall through */
3680         case I915_GGTT_VIEW_NORMAL:
3681                 vma->pages = vma->obj->mm.pages;
3682                 return 0;
3683
3684         case I915_GGTT_VIEW_ROTATED:
3685                 vma->pages =
3686                         intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
3687                 break;
3688
3689         case I915_GGTT_VIEW_PARTIAL:
3690                 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
3691                 break;
3692         }
3693
3694         ret = 0;
3695         if (IS_ERR(vma->pages)) {
3696                 ret = PTR_ERR(vma->pages);
3697                 vma->pages = NULL;
3698                 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3699                           vma->ggtt_view.type, ret);
3700         }
3701         return ret;
3702 }
3703
3704 /**
3705  * i915_gem_gtt_reserve - reserve a node in an address_space (GTT)
3706  * @vm: the &struct i915_address_space
3707  * @node: the &struct drm_mm_node (typically i915_vma.mode)
3708  * @size: how much space to allocate inside the GTT,
3709  *        must be #I915_GTT_PAGE_SIZE aligned
3710  * @offset: where to insert inside the GTT,
3711  *          must be #I915_GTT_MIN_ALIGNMENT aligned, and the node
3712  *          (@offset + @size) must fit within the address space
3713  * @color: color to apply to node, if this node is not from a VMA,
3714  *         color must be #I915_COLOR_UNEVICTABLE
3715  * @flags: control search and eviction behaviour
3716  *
3717  * i915_gem_gtt_reserve() tries to insert the @node at the exact @offset inside
3718  * the address space (using @size and @color). If the @node does not fit, it
3719  * tries to evict any overlapping nodes from the GTT, including any
3720  * neighbouring nodes if the colors do not match (to ensure guard pages between
3721  * differing domains). See i915_gem_evict_for_node() for the gory details
3722  * on the eviction algorithm. #PIN_NONBLOCK may used to prevent waiting on
3723  * evicting active overlapping objects, and any overlapping node that is pinned
3724  * or marked as unevictable will also result in failure.
3725  *
3726  * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
3727  * asked to wait for eviction and interrupted.
3728  */
3729 int i915_gem_gtt_reserve(struct i915_address_space *vm,
3730                          struct drm_mm_node *node,
3731                          u64 size, u64 offset, unsigned long color,
3732                          unsigned int flags)
3733 {
3734         int err;
3735
3736         GEM_BUG_ON(!size);
3737         GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
3738         GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
3739         GEM_BUG_ON(range_overflows(offset, size, vm->total));
3740         GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
3741         GEM_BUG_ON(drm_mm_node_allocated(node));
3742
3743         node->size = size;
3744         node->start = offset;
3745         node->color = color;
3746
3747         err = drm_mm_reserve_node(&vm->mm, node);
3748         if (err != -ENOSPC)
3749                 return err;
3750
3751         if (flags & PIN_NOEVICT)
3752                 return -ENOSPC;
3753
3754         err = i915_gem_evict_for_node(vm, node, flags);
3755         if (err == 0)
3756                 err = drm_mm_reserve_node(&vm->mm, node);
3757
3758         return err;
3759 }
3760
3761 static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
3762 {
3763         u64 range, addr;
3764
3765         GEM_BUG_ON(range_overflows(start, len, end));
3766         GEM_BUG_ON(round_up(start, align) > round_down(end - len, align));
3767
3768         range = round_down(end - len, align) - round_up(start, align);
3769         if (range) {
3770                 if (sizeof(unsigned long) == sizeof(u64)) {
3771                         addr = get_random_long();
3772                 } else {
3773                         addr = get_random_int();
3774                         if (range > U32_MAX) {
3775                                 addr <<= 32;
3776                                 addr |= get_random_int();
3777                         }
3778                 }
3779                 div64_u64_rem(addr, range, &addr);
3780                 start += addr;
3781         }
3782
3783         return round_up(start, align);
3784 }
3785
3786 /**
3787  * i915_gem_gtt_insert - insert a node into an address_space (GTT)
3788  * @vm: the &struct i915_address_space
3789  * @node: the &struct drm_mm_node (typically i915_vma.node)
3790  * @size: how much space to allocate inside the GTT,
3791  *        must be #I915_GTT_PAGE_SIZE aligned
3792  * @alignment: required alignment of starting offset, may be 0 but
3793  *             if specified, this must be a power-of-two and at least
3794  *             #I915_GTT_MIN_ALIGNMENT
3795  * @color: color to apply to node
3796  * @start: start of any range restriction inside GTT (0 for all),
3797  *         must be #I915_GTT_PAGE_SIZE aligned
3798  * @end: end of any range restriction inside GTT (U64_MAX for all),
3799  *       must be #I915_GTT_PAGE_SIZE aligned if not U64_MAX
3800  * @flags: control search and eviction behaviour
3801  *
3802  * i915_gem_gtt_insert() first searches for an available hole into which
3803  * is can insert the node. The hole address is aligned to @alignment and
3804  * its @size must then fit entirely within the [@start, @end] bounds. The
3805  * nodes on either side of the hole must match @color, or else a guard page
3806  * will be inserted between the two nodes (or the node evicted). If no
3807  * suitable hole is found, first a victim is randomly selected and tested
3808  * for eviction, otherwise then the LRU list of objects within the GTT
3809  * is scanned to find the first set of replacement nodes to create the hole.
3810  * Those old overlapping nodes are evicted from the GTT (and so must be
3811  * rebound before any future use). Any node that is currently pinned cannot
3812  * be evicted (see i915_vma_pin()). Similar if the node's VMA is currently
3813  * active and #PIN_NONBLOCK is specified, that node is also skipped when
3814  * searching for an eviction candidate. See i915_gem_evict_something() for
3815  * the gory details on the eviction algorithm.
3816  *
3817  * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
3818  * asked to wait for eviction and interrupted.
3819  */
3820 int i915_gem_gtt_insert(struct i915_address_space *vm,
3821                         struct drm_mm_node *node,
3822                         u64 size, u64 alignment, unsigned long color,
3823                         u64 start, u64 end, unsigned int flags)
3824 {
3825         enum drm_mm_insert_mode mode;
3826         u64 offset;
3827         int err;
3828
3829         lockdep_assert_held(&vm->i915->drm.struct_mutex);
3830         GEM_BUG_ON(!size);
3831         GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
3832         GEM_BUG_ON(alignment && !is_power_of_2(alignment));
3833         GEM_BUG_ON(alignment && !IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
3834         GEM_BUG_ON(start >= end);
3835         GEM_BUG_ON(start > 0  && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
3836         GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
3837         GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
3838         GEM_BUG_ON(drm_mm_node_allocated(node));
3839
3840         if (unlikely(range_overflows(start, size, end)))
3841                 return -ENOSPC;
3842
3843         if (unlikely(round_up(start, alignment) > round_down(end - size, alignment)))
3844                 return -ENOSPC;
3845
3846         mode = DRM_MM_INSERT_BEST;
3847         if (flags & PIN_HIGH)
3848                 mode = DRM_MM_INSERT_HIGHEST;
3849         if (flags & PIN_MAPPABLE)
3850                 mode = DRM_MM_INSERT_LOW;
3851
3852         /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
3853          * so we know that we always have a minimum alignment of 4096.
3854          * The drm_mm range manager is optimised to return results
3855          * with zero alignment, so where possible use the optimal
3856          * path.
3857          */
3858         BUILD_BUG_ON(I915_GTT_MIN_ALIGNMENT > I915_GTT_PAGE_SIZE);
3859         if (alignment <= I915_GTT_MIN_ALIGNMENT)
3860                 alignment = 0;
3861
3862         err = drm_mm_insert_node_in_range(&vm->mm, node,
3863                                           size, alignment, color,
3864                                           start, end, mode);
3865         if (err != -ENOSPC)
3866                 return err;
3867
3868         if (mode & DRM_MM_INSERT_ONCE) {
3869                 err = drm_mm_insert_node_in_range(&vm->mm, node,
3870                                                   size, alignment, color,
3871                                                   start, end,
3872                                                   DRM_MM_INSERT_BEST);
3873                 if (err != -ENOSPC)
3874                         return err;
3875         }
3876
3877         if (flags & PIN_NOEVICT)
3878                 return -ENOSPC;
3879
3880         /* No free space, pick a slot at random.
3881          *
3882          * There is a pathological case here using a GTT shared between
3883          * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt):
3884          *
3885          *    |<-- 256 MiB aperture -->||<-- 1792 MiB unmappable -->|
3886          *         (64k objects)             (448k objects)
3887          *
3888          * Now imagine that the eviction LRU is ordered top-down (just because
3889          * pathology meets real life), and that we need to evict an object to
3890          * make room inside the aperture. The eviction scan then has to walk
3891          * the 448k list before it finds one within range. And now imagine that
3892          * it has to search for a new hole between every byte inside the memcpy,
3893          * for several simultaneous clients.
3894          *
3895          * On a full-ppgtt system, if we have run out of available space, there
3896          * will be lots and lots of objects in the eviction list! Again,
3897          * searching that LRU list may be slow if we are also applying any
3898          * range restrictions (e.g. restriction to low 4GiB) and so, for
3899          * simplicity and similarilty between different GTT, try the single
3900          * random replacement first.
3901          */
3902         offset = random_offset(start, end,
3903                                size, alignment ?: I915_GTT_MIN_ALIGNMENT);
3904         err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags);
3905         if (err != -ENOSPC)
3906                 return err;
3907
3908         /* Randomly selected placement is pinned, do a search */
3909         err = i915_gem_evict_something(vm, size, alignment, color,
3910                                        start, end, flags);
3911         if (err)
3912                 return err;
3913
3914         return drm_mm_insert_node_in_range(&vm->mm, node,
3915                                            size, alignment, color,
3916                                            start, end, DRM_MM_INSERT_EVICT);
3917 }
3918
3919 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
3920 #include "selftests/mock_gtt.c"
3921 #include "selftests/i915_gem_gtt.c"
3922 #endif