drm/amdgpu: correctly sign extend 48bit addresses v3

[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_vm.h
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

index 67a15d439ac006f97b48ffe7e74544d6688bf452..e275ee7c1bc1f2d9cf9b50fbe3504537ac9952ab 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -29,6 +29,7 @@
  #include <linux/rbtree.h>
  #include <drm/gpu_scheduler.h>
  #include <drm/drm_file.h>
+#include <drm/ttm/ttm_bo_driver.h>
  
  #include "amdgpu_sync.h"
  #include "amdgpu_ring.h"
@@ -48,9 +49,6 @@ struct amdgpu_bo_list_entry;
  /* number of entries in page table */
  #define AMDGPU_VM_PTE_COUNT(adev) (1 << (adev)->vm_manager.block_size)
  
-/* PTBs (Page Table Blocks) need to be aligned to 32K */
-#define AMDGPU_VM_PTB_ALIGN_SIZE   32768
-
  #define AMDGPU_PTE_VALID       (1ULL << 0)
  #define AMDGPU_PTE_SYSTEM      (1ULL << 1)
  #define AMDGPU_PTE_SNOOPED     (1ULL << 2)
@@ -103,19 +101,6 @@ struct amdgpu_bo_list_entry;
  /* hardcode that limit for now */
  #define AMDGPU_VA_RESERVED_SIZE                        (1ULL << 20)
  
-/* VA hole for 48bit addresses on Vega10 */
-#define AMDGPU_VA_HOLE_START                   0x0000800000000000ULL
-#define AMDGPU_VA_HOLE_END                     0xffff800000000000ULL
-
-/*
- * Hardware is programmed as if the hole doesn't exists with start and end
- * address values.
- *
- * This mask is used to remove the upper 16bits of the VA and so come up with
- * the linear addr value.
- */
-#define AMDGPU_VA_HOLE_MASK                    0x0000ffffffffffffULL
-
  /* max vmids dedicated for process */
  #define AMDGPU_VM_MAX_RESERVED_VMID    1
  
@@ -160,6 +145,27 @@ struct amdgpu_vm_pt {
         struct amdgpu_vm_pt             *entries;
  };
  
+/* provided by hw blocks that can write ptes, e.g., sdma */
+struct amdgpu_vm_pte_funcs {
+       /* number of dw to reserve per operation */
+       unsigned        copy_pte_num_dw;
+
+       /* copy pte entries from GART */
+       void (*copy_pte)(struct amdgpu_ib *ib,
+                        uint64_t pe, uint64_t src,
+                        unsigned count);
+
+       /* write pte one entry at a time with addr mapping */
+       void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
+                         uint64_t value, unsigned count,
+                         uint32_t incr);
+       /* for linear pte/pde updates without addr mapping */
+       void (*set_pte_pde)(struct amdgpu_ib *ib,
+                           uint64_t pe,
+                           uint64_t addr, unsigned count,
+                           uint32_t incr, uint64_t flags);
+};
+
  #define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | (addr))
  #define AMDGPU_VM_FAULT_PASID(fault) ((u64)(fault) >> 48)
  #define AMDGPU_VM_FAULT_ADDR(fault)  ((u64)(fault) & 0xfffffffff000ULL)
@@ -182,13 +188,16 @@ struct amdgpu_vm {
         /* PT BOs which relocated and their parent need an update */
         struct list_head        relocated;
  
-       /* BOs moved, but not yet updated in the PT */
+       /* per VM BOs moved, but not yet updated in the PT */
         struct list_head        moved;
-       spinlock_t              moved_lock;
  
         /* All BOs of this VM not currently in the state machine */
         struct list_head        idle;
  
+       /* regular invalidated BOs, but not yet updated in the PT */
+       struct list_head        invalidated;
+       spinlock_t              invalidated_lock;
+
         /* BO mappings freed, but not yet updated in the PT */
         struct list_head        freed;
  
@@ -226,6 +235,11 @@ struct amdgpu_vm {
  
         /* Some basic info about the task */
         struct amdgpu_task_info task_info;
+
+       /* Store positions of group of BOs */
+       struct ttm_lru_bulk_move lru_bulk_move;
+       /* mark whether can do the bulk move */
+       bool                    bulk_moveable;
  };
  
  struct amdgpu_vm_manager {
@@ -244,10 +258,9 @@ struct amdgpu_vm_manager {
         /* vram base address for page table entry  */
         u64                                     vram_base_offset;
         /* vm pte handling */
-       const struct amdgpu_vm_pte_funcs        *vm_pte_funcs;
-       struct amdgpu_ring                      *vm_pte_rings[AMDGPU_MAX_RINGS];
-       unsigned                                vm_pte_num_rings;
-       atomic_t                                vm_pte_next_ring;
+       const struct amdgpu_vm_pte_funcs        *vm_pte_funcs;
+       struct drm_sched_rq                     *vm_pte_rqs[AMDGPU_MAX_RINGS];
+       unsigned                                vm_pte_num_rqs;
  
         /* partial resident texture handling */
         spinlock_t                              prt_lock;
@@ -266,11 +279,16 @@ struct amdgpu_vm_manager {
         spinlock_t                              pasid_lock;
  };
  
+#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
+#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
+#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
+
  void amdgpu_vm_manager_init(struct amdgpu_device *adev);
  void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
  int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
                    int vm_context, unsigned int pasid);
-int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid);
+void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
  void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
  bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
                                   unsigned int pasid);
@@ -321,7 +339,7 @@ struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
  void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket);
  void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
                       struct amdgpu_bo_va *bo_va);
-void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
+void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
                            uint32_t fragment_size_default, unsigned max_level,
                            unsigned max_bits);
  int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
@@ -330,8 +348,11 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
  void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
  
  void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
-                        struct amdgpu_task_info *task_info);
+                            struct amdgpu_task_info *task_info);
  
  void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
  
+void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
+                               struct amdgpu_vm *vm);
+
  #endif