From 1d1b5490b91c932a75188e9acf76ead68d6e9741 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 4 Jun 2019 16:38:30 +0100 Subject: [PATCH] drm/i915/gtt: Replace struct_mutex serialisation for allocation Instead of relying on the caller holding struct_mutex across the allocation, push the allocation under a tree of spinlocks stored inside the page tables. Not only should this allow us to avoid struct_mutex here, but it will allow multiple users to lock independent ranges for concurrent allocations, and operate independently. This is vital for pushing the GTT manipulation into a background thread where dependency on struct_mutex is verboten, and for allowing other callers to avoid struct_mutex altogether. v2: Restore lost GEM_BUG_ON for removing too many PTE from gen6_ppgtt_clear_range. Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Mika Kuoppala Reviewed-by: Joonas Lahtinen Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190604153830.19096-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 213 +++++++++++++++++++--------- drivers/gpu/drm/i915/i915_gem_gtt.h | 9 +- 2 files changed, 153 insertions(+), 69 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index d415438d4815..56a436858043 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -659,7 +659,7 @@ static struct i915_page_table *alloc_pt(struct i915_address_space *vm) return ERR_PTR(-ENOMEM); } - pt->used_ptes = 0; + atomic_set(&pt->used_ptes, 0); return pt; } @@ -694,7 +694,8 @@ static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) return ERR_PTR(-ENOMEM); } - pd->used_pdes = 0; + atomic_set(&pd->used_pdes, 0); + spin_lock_init(&pd->lock); return pd; } @@ -725,6 +726,8 @@ static int __pdp_init(struct i915_address_space *vm, memset_p((void **)pdp->page_directory, vm->scratch_pd, pdpes); + atomic_set(&pdp->used_pdpes, 0); + spin_lock_init(&pdp->lock); return 0; } @@ -779,11 +782,8 @@ static void free_pdp(struct i915_address_space *vm, static void gen8_initialize_pdp(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { - gen8_ppgtt_pdpe_t scratch_pdpe; - - scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); - - fill_px(vm, pdp, scratch_pdpe); + fill_px(vm, pdp, + gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC)); } static void gen8_initialize_pml4(struct i915_address_space *vm, @@ -792,6 +792,7 @@ static void gen8_initialize_pml4(struct i915_address_space *vm, fill_px(vm, pml4, gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC)); memset_p((void **)pml4->pdps, vm->scratch_pdp, GEN8_PML4ES_PER_PML4); + spin_lock_init(&pml4->lock); } /* @@ -815,17 +816,12 @@ static bool gen8_ppgtt_clear_pt(const struct i915_address_space *vm, unsigned int num_entries = gen8_pte_count(start, length); gen8_pte_t *vaddr; - GEM_BUG_ON(num_entries > pt->used_ptes); - - pt->used_ptes -= num_entries; - if (!pt->used_ptes) - return true; - vaddr = kmap_atomic_px(pt); memset64(vaddr + gen8_pte_index(start), vm->scratch_pte, num_entries); kunmap_atomic(vaddr); - return false; + GEM_BUG_ON(num_entries > atomic_read(&pt->used_ptes)); + return !atomic_sub_return(num_entries, &pt->used_ptes); } static void gen8_ppgtt_set_pde(struct i915_address_space *vm, @@ -835,8 +831,6 @@ static void gen8_ppgtt_set_pde(struct i915_address_space *vm, { gen8_pde_t *vaddr; - pd->page_table[pde] = pt; - vaddr = kmap_atomic_px(pd); vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC); kunmap_atomic(vaddr); @@ -850,19 +844,28 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, u32 pde; gen8_for_each_pde(pt, pd, start, length, pde) { + bool free = false; + GEM_BUG_ON(pt == vm->scratch_pt); if (!gen8_ppgtt_clear_pt(vm, pt, start, length)) continue; - gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde); - GEM_BUG_ON(!pd->used_pdes); - pd->used_pdes--; + spin_lock(&pd->lock); + if (!atomic_read(&pt->used_ptes)) { + gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde); + pd->page_table[pde] = vm->scratch_pt; - free_pt(vm, pt); + GEM_BUG_ON(!atomic_read(&pd->used_pdes)); + atomic_dec(&pd->used_pdes); + free = true; + } + spin_unlock(&pd->lock); + if (free) + free_pt(vm, pt); } - return !pd->used_pdes; + return !atomic_read(&pd->used_pdes); } static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm, @@ -872,7 +875,6 @@ static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm, { gen8_ppgtt_pdpe_t *vaddr; - pdp->page_directory[pdpe] = pd; if (!i915_vm_is_4lvl(vm)) return; @@ -892,19 +894,28 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, unsigned int pdpe; gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { + bool free = false; + GEM_BUG_ON(pd == vm->scratch_pd); if (!gen8_ppgtt_clear_pd(vm, pd, start, length)) continue; - gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); - GEM_BUG_ON(!pdp->used_pdpes); - pdp->used_pdpes--; + spin_lock(&pdp->lock); + if (!atomic_read(&pd->used_pdes)) { + gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); + pdp->page_directory[pdpe] = vm->scratch_pd; - free_pd(vm, pd); + GEM_BUG_ON(!atomic_read(&pdp->used_pdpes)); + atomic_dec(&pdp->used_pdpes); + free = true; + } + spin_unlock(&pdp->lock); + if (free) + free_pd(vm, pd); } - return !pdp->used_pdpes; + return !atomic_read(&pdp->used_pdpes); } static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm, @@ -919,8 +930,6 @@ static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4, { gen8_ppgtt_pml4e_t *vaddr; - pml4->pdps[pml4e] = pdp; - vaddr = kmap_atomic_px(pml4); vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); kunmap_atomic(vaddr); @@ -941,14 +950,21 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, GEM_BUG_ON(!i915_vm_is_4lvl(vm)); gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { + bool free = false; GEM_BUG_ON(pdp == vm->scratch_pdp); if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length)) continue; - gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); - - free_pdp(vm, pdp); + spin_lock(&pml4->lock); + if (!atomic_read(&pdp->used_pdpes)) { + gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); + pml4->pdps[pml4e] = vm->scratch_pdp; + free = true; + } + spin_unlock(&pml4->lock); + if (free) + free_pdp(vm, pdp); } } @@ -1373,27 +1389,38 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, u64 from = start; unsigned int pde; + spin_lock(&pd->lock); gen8_for_each_pde(pt, pd, start, length, pde) { - int count = gen8_pte_count(start, length); + const int count = gen8_pte_count(start, length); if (pt == vm->scratch_pt) { - pd->used_pdes++; + struct i915_page_table *old; + + spin_unlock(&pd->lock); pt = alloc_pt(vm); - if (IS_ERR(pt)) { - pd->used_pdes--; + if (IS_ERR(pt)) goto unwind; - } if (count < GEN8_PTES || intel_vgpu_active(vm->i915)) gen8_initialize_pt(vm, pt); - gen8_ppgtt_set_pde(vm, pd, pt, pde); - GEM_BUG_ON(pd->used_pdes > I915_PDES); + old = cmpxchg(&pd->page_table[pde], vm->scratch_pt, pt); + if (old == vm->scratch_pt) { + gen8_ppgtt_set_pde(vm, pd, pt, pde); + atomic_inc(&pd->used_pdes); + } else { + free_pt(vm, pt); + pt = old; + } + + spin_lock(&pd->lock); } - pt->used_ptes += count; + atomic_add(count, &pt->used_ptes); } + spin_unlock(&pd->lock); + return 0; unwind: @@ -1410,35 +1437,54 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, unsigned int pdpe; int ret; + spin_lock(&pdp->lock); gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { if (pd == vm->scratch_pd) { - pdp->used_pdpes++; + struct i915_page_directory *old; + + spin_unlock(&pdp->lock); pd = alloc_pd(vm); - if (IS_ERR(pd)) { - pdp->used_pdpes--; + if (IS_ERR(pd)) goto unwind; - } gen8_initialize_pd(vm, pd); - gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); - GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm)); + + old = cmpxchg(&pdp->page_directory[pdpe], + vm->scratch_pd, pd); + if (old == vm->scratch_pd) { + gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); + atomic_inc(&pdp->used_pdpes); + } else { + free_pd(vm, pd); + pd = old; + } + + spin_lock(&pdp->lock); } + atomic_inc(&pd->used_pdes); + spin_unlock(&pdp->lock); ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); if (unlikely(ret)) goto unwind_pd; + + spin_lock(&pdp->lock); + atomic_dec(&pd->used_pdes); } + spin_unlock(&pdp->lock); return 0; unwind_pd: - if (!pd->used_pdes) { + spin_lock(&pdp->lock); + if (atomic_dec_and_test(&pd->used_pdes)) { gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); - GEM_BUG_ON(!pdp->used_pdpes); - pdp->used_pdpes--; + GEM_BUG_ON(!atomic_read(&pdp->used_pdpes)); + atomic_dec(&pdp->used_pdpes); free_pd(vm, pd); } + spin_unlock(&pdp->lock); unwind: gen8_ppgtt_clear_pdp(vm, pdp, from, start - from); return -ENOMEM; @@ -1461,28 +1507,50 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, u32 pml4e; int ret; + spin_lock(&pml4->lock); gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (pml4->pdps[pml4e] == vm->scratch_pdp) { + if (pdp == vm->scratch_pdp) { + struct i915_page_directory_pointer *old; + + spin_unlock(&pml4->lock); + pdp = alloc_pdp(vm); if (IS_ERR(pdp)) goto unwind; gen8_initialize_pdp(vm, pdp); - gen8_ppgtt_set_pml4e(pml4, pdp, pml4e); + + old = cmpxchg(&pml4->pdps[pml4e], vm->scratch_pdp, pdp); + if (old == vm->scratch_pdp) { + gen8_ppgtt_set_pml4e(pml4, pdp, pml4e); + } else { + free_pdp(vm, pdp); + pdp = old; + } + + spin_lock(&pml4->lock); } + atomic_inc(&pdp->used_pdpes); + spin_unlock(&pml4->lock); ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length); if (unlikely(ret)) goto unwind_pdp; + + spin_lock(&pml4->lock); + atomic_dec(&pdp->used_pdpes); } + spin_unlock(&pml4->lock); return 0; unwind_pdp: - if (!pdp->used_pdpes) { + spin_lock(&pml4->lock); + if (atomic_dec_and_test(&pdp->used_pdpes)) { gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); free_pdp(vm, pdp); } + spin_unlock(&pml4->lock); unwind: gen8_ppgtt_clear_4lvl(vm, from, start - from); return -ENOMEM; @@ -1504,10 +1572,10 @@ static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt) gen8_initialize_pd(vm, pd); gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); - pdp->used_pdpes++; + atomic_inc(&pdp->used_pdpes); } - pdp->used_pdpes++; /* never remove */ + atomic_inc(&pdp->used_pdpes); /* never remove */ return 0; unwind: @@ -1516,7 +1584,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt) gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); free_pd(vm, pd); } - pdp->used_pdpes = 0; + atomic_set(&pdp->used_pdpes, 0); return -ENOMEM; } @@ -1688,9 +1756,8 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, num_entries -= count; - GEM_BUG_ON(count > pt->used_ptes); - pt->used_ptes -= count; - if (!pt->used_ptes) + GEM_BUG_ON(count > atomic_read(&pt->used_ptes)); + if (!atomic_sub_return(count, &pt->used_ptes)) ppgtt->scan_for_unused_pt = true; /* @@ -1760,28 +1827,41 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, wakeref = intel_runtime_pm_get(vm->i915); + spin_lock(&ppgtt->base.pd.lock); gen6_for_each_pde(pt, &ppgtt->base.pd, start, length, pde) { const unsigned int count = gen6_pte_count(start, length); if (pt == vm->scratch_pt) { + struct i915_page_table *old; + + spin_unlock(&ppgtt->base.pd.lock); + pt = alloc_pt(vm); if (IS_ERR(pt)) goto unwind_out; gen6_initialize_pt(vm, pt); - ppgtt->base.pd.page_table[pde] = pt; - if (i915_vma_is_bound(ppgtt->vma, - I915_VMA_GLOBAL_BIND)) { - gen6_write_pde(ppgtt, pde, pt); - flush = true; + old = cmpxchg(&ppgtt->base.pd.page_table[pde], + vm->scratch_pt, pt); + if (old == vm->scratch_pt) { + ppgtt->base.pd.page_table[pde] = pt; + if (i915_vma_is_bound(ppgtt->vma, + I915_VMA_GLOBAL_BIND)) { + gen6_write_pde(ppgtt, pde, pt); + flush = true; + } + } else { + free_pt(vm, pt); + pt = old; } - GEM_BUG_ON(pt->used_ptes); + spin_lock(&ppgtt->base.pd.lock); } - pt->used_ptes += count; + atomic_add(count, &pt->used_ptes); } + spin_unlock(&ppgtt->base.pd.lock); if (flush) { mark_tlbs_dirty(&ppgtt->base); @@ -1822,6 +1902,7 @@ static int gen6_ppgtt_init_scratch(struct gen6_hw_ppgtt *ppgtt) gen6_initialize_pt(vm, vm->scratch_pt); gen6_for_all_pdes(unused, &ppgtt->base.pd, pde) ppgtt->base.pd.page_table[pde] = vm->scratch_pt; + spin_lock_init(&ppgtt->base.pd.lock); return 0; } @@ -1950,7 +2031,7 @@ static void pd_vma_unbind(struct i915_vma *vma) /* Free all no longer used page tables */ gen6_for_all_pdes(pt, &ppgtt->base.pd, pde) { - if (pt->used_ptes || pt == scratch_pt) + if (atomic_read(&pt->used_ptes) || pt == scratch_pt) continue; free_pt(&ppgtt->base.vm, pt); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 73b6608740f2..152a03560c22 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -248,25 +248,28 @@ struct i915_page_dma { struct i915_page_table { struct i915_page_dma base; - unsigned int used_ptes; + atomic_t used_ptes; }; struct i915_page_directory { struct i915_page_dma base; struct i915_page_table *page_table[I915_PDES]; /* PDEs */ - unsigned int used_pdes; + atomic_t used_pdes; + spinlock_t lock; }; struct i915_page_directory_pointer { struct i915_page_dma base; struct i915_page_directory **page_directory; - unsigned int used_pdpes; + atomic_t used_pdpes; + spinlock_t lock; }; struct i915_pml4 { struct i915_page_dma base; struct i915_page_directory_pointer *pdps[GEN8_PML4ES_PER_PML4]; + spinlock_t lock; }; struct i915_vma_ops { -- 2.45.2