]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
drm/amdkfd: Remove deprecated get_vmem_size
[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_amdkfd_gfx_v8.c
index 056929b8ccd04e29403ecb401b335aa1ea8d754a..72ff6463594da02550db3d1ae699df85fc479805 100644 (file)
@@ -45,7 +45,7 @@ enum hqd_dequeue_request_type {
        RESET_WAVES
 };
 
-struct cik_sdma_rlc_registers;
+struct vi_sdma_mqd;
 
 /*
  * Register access functions
@@ -64,7 +64,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
                        uint32_t queue_id, uint32_t __user *wptr,
                        uint32_t wptr_shift, uint32_t wptr_mask,
                        struct mm_struct *mm);
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+                       uint32_t pipe_id, uint32_t queue_id,
+                       uint32_t (**dump)[2], uint32_t *n_regs);
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+                            uint32_t __user *wptr, struct mm_struct *mm);
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+                            uint32_t engine_id, uint32_t queue_id,
+                            uint32_t (**dump)[2], uint32_t *n_regs);
 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
                uint32_t pipe_id, uint32_t queue_id);
 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
@@ -125,7 +132,7 @@ static int get_tile_config(struct kgd_dev *kgd,
 static const struct kfd2kgd_calls kfd2kgd = {
        .init_gtt_mem_allocation = alloc_gtt_mem,
        .free_gtt_mem = free_gtt_mem,
-       .get_vmem_size = get_vmem_size,
+       .get_local_mem_info = get_local_mem_info,
        .get_gpu_clock_counter = get_gpu_clock_counter,
        .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
        .alloc_pasid = amdgpu_vm_alloc_pasid,
@@ -136,6 +143,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
        .init_interrupts = kgd_init_interrupts,
        .hqd_load = kgd_hqd_load,
        .hqd_sdma_load = kgd_hqd_sdma_load,
+       .hqd_dump = kgd_hqd_dump,
+       .hqd_sdma_dump = kgd_hqd_sdma_dump,
        .hqd_is_occupied = kgd_hqd_is_occupied,
        .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
        .hqd_destroy = kgd_hqd_destroy,
@@ -152,6 +161,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
        .get_fw_version = get_fw_version,
        .set_scratch_backing_va = set_scratch_backing_va,
        .get_tile_config = get_tile_config,
+       .get_cu_info = get_cu_info
 };
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
@@ -268,9 +278,15 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
        return 0;
 }
 
-static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
+static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m)
 {
-       return 0;
+       uint32_t retval;
+
+       retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
+               m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
+       pr_debug("kfd: sdma base address: 0x%x\n", retval);
+
+       return retval;
 }
 
 static inline struct vi_mqd *get_mqd(void *mqd)
@@ -278,9 +294,9 @@ static inline struct vi_mqd *get_mqd(void *mqd)
        return (struct vi_mqd *)mqd;
 }
 
-static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
+static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
 {
-       return (struct cik_sdma_rlc_registers *)mqd;
+       return (struct vi_sdma_mqd *)mqd;
 }
 
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
@@ -358,8 +374,138 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
        return 0;
 }
 
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+                       uint32_t pipe_id, uint32_t queue_id,
+                       uint32_t (**dump)[2], uint32_t *n_regs)
 {
+       struct amdgpu_device *adev = get_amdgpu_device(kgd);
+       uint32_t i = 0, reg;
+#define HQD_N_REGS (54+4)
+#define DUMP_REG(addr) do {                            \
+               if (WARN_ON_ONCE(i >= HQD_N_REGS))      \
+                       break;                          \
+               (*dump)[i][0] = (addr) << 2;            \
+               (*dump)[i++][1] = RREG32(addr);         \
+       } while (0)
+
+       *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+       if (*dump == NULL)
+               return -ENOMEM;
+
+       acquire_queue(kgd, pipe_id, queue_id);
+
+       DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
+       DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
+       DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
+       DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
+
+       for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++)
+               DUMP_REG(reg);
+
+       release_queue(kgd);
+
+       WARN_ON_ONCE(i != HQD_N_REGS);
+       *n_regs = i;
+
+       return 0;
+}
+
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+                            uint32_t __user *wptr, struct mm_struct *mm)
+{
+       struct amdgpu_device *adev = get_amdgpu_device(kgd);
+       struct vi_sdma_mqd *m;
+       unsigned long end_jiffies;
+       uint32_t sdma_base_addr;
+       uint32_t data;
+
+       m = get_sdma_mqd(mqd);
+       sdma_base_addr = get_sdma_base_addr(m);
+       WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+               m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+       end_jiffies = msecs_to_jiffies(2000) + jiffies;
+       while (true) {
+               data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+               if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+                       break;
+               if (time_after(jiffies, end_jiffies))
+                       return -ETIME;
+               usleep_range(500, 1000);
+       }
+       if (m->sdma_engine_id) {
+               data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL);
+               data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL,
+                               RESUME_CTX, 0);
+               WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data);
+       } else {
+               data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL);
+               data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
+                               RESUME_CTX, 0);
+               WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
+       }
+
+       data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+                            ENABLE, 1);
+       WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
+       WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
+
+       if (read_user_wptr(mm, wptr, data))
+               WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
+       else
+               WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+                      m->sdmax_rlcx_rb_rptr);
+
+       WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
+                               m->sdmax_rlcx_virtual_addr);
+       WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+       WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+                       m->sdmax_rlcx_rb_base_hi);
+       WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+                       m->sdmax_rlcx_rb_rptr_addr_lo);
+       WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+                       m->sdmax_rlcx_rb_rptr_addr_hi);
+
+       data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+                            RB_ENABLE, 1);
+       WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+
+       return 0;
+}
+
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+                            uint32_t engine_id, uint32_t queue_id,
+                            uint32_t (**dump)[2], uint32_t *n_regs)
+{
+       struct amdgpu_device *adev = get_amdgpu_device(kgd);
+       uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
+               queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
+       uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+4+2+3+7)
+
+       *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+       if (*dump == NULL)
+               return -ENOMEM;
+
+       for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+               DUMP_REG(sdma_offset + reg);
+       for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
+            reg++)
+               DUMP_REG(sdma_offset + reg);
+       for (reg = mmSDMA0_RLC0_CSA_ADDR_LO; reg <= mmSDMA0_RLC0_CSA_ADDR_HI;
+            reg++)
+               DUMP_REG(sdma_offset + reg);
+       for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_DUMMY_REG;
+            reg++)
+               DUMP_REG(sdma_offset + reg);
+       for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL;
+            reg++)
+               DUMP_REG(sdma_offset + reg);
+
+       WARN_ON_ONCE(i != HQD_N_REGS);
+       *n_regs = i;
+
        return 0;
 }
 
@@ -388,7 +534,7 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
 {
        struct amdgpu_device *adev = get_amdgpu_device(kgd);
-       struct cik_sdma_rlc_registers *m;
+       struct vi_sdma_mqd *m;
        uint32_t sdma_base_addr;
        uint32_t sdma_rlc_rb_cntl;
 
@@ -509,10 +655,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
                                unsigned int utimeout)
 {
        struct amdgpu_device *adev = get_amdgpu_device(kgd);
-       struct cik_sdma_rlc_registers *m;
+       struct vi_sdma_mqd *m;
        uint32_t sdma_base_addr;
        uint32_t temp;
-       int timeout = utimeout;
+       unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
 
        m = get_sdma_mqd(mqd);
        sdma_base_addr = get_sdma_base_addr(m);
@@ -523,18 +669,19 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 
        while (true) {
                temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
-               if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
+               if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
                        break;
-               if (timeout <= 0)
+               if (time_after(jiffies, end_jiffies))
                        return -ETIME;
-               msleep(20);
-               timeout -= 20;
+               usleep_range(500, 1000);
        }
 
        WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
-       WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
-       WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
-       WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0);
+       WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+               RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+               SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+       m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
 
        return 0;
 }