]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
Merge tag 'aspeed-5.4-devicetree' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_amdkfd_gfx_v8.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22
23 #include <linux/module.h>
24 #include <linux/fdtable.h>
25 #include <linux/uaccess.h>
26 #include <linux/mmu_context.h>
27
28 #include "amdgpu.h"
29 #include "amdgpu_amdkfd.h"
30 #include "gfx_v8_0.h"
31 #include "gca/gfx_8_0_sh_mask.h"
32 #include "gca/gfx_8_0_d.h"
33 #include "gca/gfx_8_0_enum.h"
34 #include "oss/oss_3_0_sh_mask.h"
35 #include "oss/oss_3_0_d.h"
36 #include "gmc/gmc_8_1_sh_mask.h"
37 #include "gmc/gmc_8_1_d.h"
38 #include "vi_structs.h"
39 #include "vid.h"
40
41 enum hqd_dequeue_request_type {
42         NO_ACTION = 0,
43         DRAIN_PIPE,
44         RESET_WAVES
45 };
46
47 /*
48  * Register access functions
49  */
50
51 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
52                 uint32_t sh_mem_config,
53                 uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
54                 uint32_t sh_mem_bases);
55 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
56                 unsigned int vmid);
57 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
58 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
59                         uint32_t queue_id, uint32_t __user *wptr,
60                         uint32_t wptr_shift, uint32_t wptr_mask,
61                         struct mm_struct *mm);
62 static int kgd_hqd_dump(struct kgd_dev *kgd,
63                         uint32_t pipe_id, uint32_t queue_id,
64                         uint32_t (**dump)[2], uint32_t *n_regs);
65 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
66                              uint32_t __user *wptr, struct mm_struct *mm);
67 static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
68                              uint32_t engine_id, uint32_t queue_id,
69                              uint32_t (**dump)[2], uint32_t *n_regs);
70 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
71                 uint32_t pipe_id, uint32_t queue_id);
72 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
73 static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
74                                 enum kfd_preempt_type reset_type,
75                                 unsigned int utimeout, uint32_t pipe_id,
76                                 uint32_t queue_id);
77 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
78                                 unsigned int utimeout);
79 static int kgd_address_watch_disable(struct kgd_dev *kgd);
80 static int kgd_address_watch_execute(struct kgd_dev *kgd,
81                                         unsigned int watch_point_id,
82                                         uint32_t cntl_val,
83                                         uint32_t addr_hi,
84                                         uint32_t addr_lo);
85 static int kgd_wave_control_execute(struct kgd_dev *kgd,
86                                         uint32_t gfx_index_val,
87                                         uint32_t sq_cmd);
88 static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
89                                         unsigned int watch_point_id,
90                                         unsigned int reg_offset);
91
92 static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
93                 uint8_t vmid);
94 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
95                 uint8_t vmid);
96 static void set_scratch_backing_va(struct kgd_dev *kgd,
97                                         uint64_t va, uint32_t vmid);
98 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
99                 uint64_t page_table_base);
100 static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
101 static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
102
103 /* Because of REG_GET_FIELD() being used, we put this function in the
104  * asic specific file.
105  */
106 static int get_tile_config(struct kgd_dev *kgd,
107                 struct tile_config *config)
108 {
109         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
110
111         config->gb_addr_config = adev->gfx.config.gb_addr_config;
112         config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
113                                 MC_ARB_RAMCFG, NOOFBANK);
114         config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
115                                 MC_ARB_RAMCFG, NOOFRANKS);
116
117         config->tile_config_ptr = adev->gfx.config.tile_mode_array;
118         config->num_tile_configs =
119                         ARRAY_SIZE(adev->gfx.config.tile_mode_array);
120         config->macro_tile_config_ptr =
121                         adev->gfx.config.macrotile_mode_array;
122         config->num_macro_tile_configs =
123                         ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
124
125         return 0;
126 }
127
128 static const struct kfd2kgd_calls kfd2kgd = {
129         .program_sh_mem_settings = kgd_program_sh_mem_settings,
130         .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
131         .init_interrupts = kgd_init_interrupts,
132         .hqd_load = kgd_hqd_load,
133         .hqd_sdma_load = kgd_hqd_sdma_load,
134         .hqd_dump = kgd_hqd_dump,
135         .hqd_sdma_dump = kgd_hqd_sdma_dump,
136         .hqd_is_occupied = kgd_hqd_is_occupied,
137         .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
138         .hqd_destroy = kgd_hqd_destroy,
139         .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
140         .address_watch_disable = kgd_address_watch_disable,
141         .address_watch_execute = kgd_address_watch_execute,
142         .wave_control_execute = kgd_wave_control_execute,
143         .address_watch_get_offset = kgd_address_watch_get_offset,
144         .get_atc_vmid_pasid_mapping_pasid =
145                         get_atc_vmid_pasid_mapping_pasid,
146         .get_atc_vmid_pasid_mapping_valid =
147                         get_atc_vmid_pasid_mapping_valid,
148         .set_scratch_backing_va = set_scratch_backing_va,
149         .get_tile_config = get_tile_config,
150         .set_vm_context_page_table_base = set_vm_context_page_table_base,
151         .invalidate_tlbs = invalidate_tlbs,
152         .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
153 };
154
155 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
156 {
157         return (struct kfd2kgd_calls *)&kfd2kgd;
158 }
159
160 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
161 {
162         return (struct amdgpu_device *)kgd;
163 }
164
165 static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
166                         uint32_t queue, uint32_t vmid)
167 {
168         struct amdgpu_device *adev = get_amdgpu_device(kgd);
169         uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
170
171         mutex_lock(&adev->srbm_mutex);
172         WREG32(mmSRBM_GFX_CNTL, value);
173 }
174
175 static void unlock_srbm(struct kgd_dev *kgd)
176 {
177         struct amdgpu_device *adev = get_amdgpu_device(kgd);
178
179         WREG32(mmSRBM_GFX_CNTL, 0);
180         mutex_unlock(&adev->srbm_mutex);
181 }
182
183 static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
184                                 uint32_t queue_id)
185 {
186         struct amdgpu_device *adev = get_amdgpu_device(kgd);
187
188         uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
189         uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
190
191         lock_srbm(kgd, mec, pipe, queue_id, 0);
192 }
193
194 static void release_queue(struct kgd_dev *kgd)
195 {
196         unlock_srbm(kgd);
197 }
198
199 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
200                                         uint32_t sh_mem_config,
201                                         uint32_t sh_mem_ape1_base,
202                                         uint32_t sh_mem_ape1_limit,
203                                         uint32_t sh_mem_bases)
204 {
205         struct amdgpu_device *adev = get_amdgpu_device(kgd);
206
207         lock_srbm(kgd, 0, 0, 0, vmid);
208
209         WREG32(mmSH_MEM_CONFIG, sh_mem_config);
210         WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
211         WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
212         WREG32(mmSH_MEM_BASES, sh_mem_bases);
213
214         unlock_srbm(kgd);
215 }
216
217 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
218                                         unsigned int vmid)
219 {
220         struct amdgpu_device *adev = get_amdgpu_device(kgd);
221
222         /*
223          * We have to assume that there is no outstanding mapping.
224          * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
225          * a mapping is in progress or because a mapping finished
226          * and the SW cleared it.
227          * So the protocol is to always wait & clear.
228          */
229         uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
230                         ATC_VMID0_PASID_MAPPING__VALID_MASK;
231
232         WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
233
234         while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
235                 cpu_relax();
236         WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
237
238         /* Mapping vmid to pasid also for IH block */
239         WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
240
241         return 0;
242 }
243
244 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
245 {
246         struct amdgpu_device *adev = get_amdgpu_device(kgd);
247         uint32_t mec;
248         uint32_t pipe;
249
250         mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
251         pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
252
253         lock_srbm(kgd, mec, pipe, 0, 0);
254
255         WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
256                         CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
257
258         unlock_srbm(kgd);
259
260         return 0;
261 }
262
263 static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m)
264 {
265         uint32_t retval;
266
267         retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
268                 m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
269         pr_debug("sdma base address: 0x%x\n", retval);
270
271         return retval;
272 }
273
274 static inline struct vi_mqd *get_mqd(void *mqd)
275 {
276         return (struct vi_mqd *)mqd;
277 }
278
279 static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
280 {
281         return (struct vi_sdma_mqd *)mqd;
282 }
283
284 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
285                         uint32_t queue_id, uint32_t __user *wptr,
286                         uint32_t wptr_shift, uint32_t wptr_mask,
287                         struct mm_struct *mm)
288 {
289         struct amdgpu_device *adev = get_amdgpu_device(kgd);
290         struct vi_mqd *m;
291         uint32_t *mqd_hqd;
292         uint32_t reg, wptr_val, data;
293         bool valid_wptr = false;
294
295         m = get_mqd(mqd);
296
297         acquire_queue(kgd, pipe_id, queue_id);
298
299         /* HIQ is set during driver init period with vmid set to 0*/
300         if (m->cp_hqd_vmid == 0) {
301                 uint32_t value, mec, pipe;
302
303                 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
304                 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
305
306                 pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
307                         mec, pipe, queue_id);
308                 value = RREG32(mmRLC_CP_SCHEDULERS);
309                 value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
310                         ((mec << 5) | (pipe << 3) | queue_id | 0x80));
311                 WREG32(mmRLC_CP_SCHEDULERS, value);
312         }
313
314         /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
315         mqd_hqd = &m->cp_mqd_base_addr_lo;
316
317         for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_CONTROL; reg++)
318                 WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
319
320         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
321          * This is safe since EOP RPTR==WPTR for any inactive HQD
322          * on ASICs that do not support context-save.
323          * EOP writes/reads can start anywhere in the ring.
324          */
325         if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) {
326                 WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
327                 WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
328                 WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
329         }
330
331         for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++)
332                 WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
333
334         /* Copy userspace write pointer value to register.
335          * Activate doorbell logic to monitor subsequent changes.
336          */
337         data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
338                              CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
339         WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
340
341         /* read_user_ptr may take the mm->mmap_sem.
342          * release srbm_mutex to avoid circular dependency between
343          * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex.
344          */
345         release_queue(kgd);
346         valid_wptr = read_user_wptr(mm, wptr, wptr_val);
347         acquire_queue(kgd, pipe_id, queue_id);
348         if (valid_wptr)
349                 WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
350
351         data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
352         WREG32(mmCP_HQD_ACTIVE, data);
353
354         release_queue(kgd);
355
356         return 0;
357 }
358
359 static int kgd_hqd_dump(struct kgd_dev *kgd,
360                         uint32_t pipe_id, uint32_t queue_id,
361                         uint32_t (**dump)[2], uint32_t *n_regs)
362 {
363         struct amdgpu_device *adev = get_amdgpu_device(kgd);
364         uint32_t i = 0, reg;
365 #define HQD_N_REGS (54+4)
366 #define DUMP_REG(addr) do {                             \
367                 if (WARN_ON_ONCE(i >= HQD_N_REGS))      \
368                         break;                          \
369                 (*dump)[i][0] = (addr) << 2;            \
370                 (*dump)[i++][1] = RREG32(addr);         \
371         } while (0)
372
373         *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
374         if (*dump == NULL)
375                 return -ENOMEM;
376
377         acquire_queue(kgd, pipe_id, queue_id);
378
379         DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
380         DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
381         DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
382         DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
383
384         for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++)
385                 DUMP_REG(reg);
386
387         release_queue(kgd);
388
389         WARN_ON_ONCE(i != HQD_N_REGS);
390         *n_regs = i;
391
392         return 0;
393 }
394
395 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
396                              uint32_t __user *wptr, struct mm_struct *mm)
397 {
398         struct amdgpu_device *adev = get_amdgpu_device(kgd);
399         struct vi_sdma_mqd *m;
400         unsigned long end_jiffies;
401         uint32_t sdma_base_addr;
402         uint32_t data;
403
404         m = get_sdma_mqd(mqd);
405         sdma_base_addr = get_sdma_base_addr(m);
406         WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
407                 m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
408
409         end_jiffies = msecs_to_jiffies(2000) + jiffies;
410         while (true) {
411                 data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
412                 if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
413                         break;
414                 if (time_after(jiffies, end_jiffies))
415                         return -ETIME;
416                 usleep_range(500, 1000);
417         }
418         if (m->sdma_engine_id) {
419                 data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL);
420                 data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL,
421                                 RESUME_CTX, 0);
422                 WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data);
423         } else {
424                 data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL);
425                 data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
426                                 RESUME_CTX, 0);
427                 WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
428         }
429
430         data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
431                              ENABLE, 1);
432         WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
433         WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
434
435         if (read_user_wptr(mm, wptr, data))
436                 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
437         else
438                 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
439                        m->sdmax_rlcx_rb_rptr);
440
441         WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
442                                 m->sdmax_rlcx_virtual_addr);
443         WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
444         WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
445                         m->sdmax_rlcx_rb_base_hi);
446         WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
447                         m->sdmax_rlcx_rb_rptr_addr_lo);
448         WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
449                         m->sdmax_rlcx_rb_rptr_addr_hi);
450
451         data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
452                              RB_ENABLE, 1);
453         WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
454
455         return 0;
456 }
457
458 static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
459                              uint32_t engine_id, uint32_t queue_id,
460                              uint32_t (**dump)[2], uint32_t *n_regs)
461 {
462         struct amdgpu_device *adev = get_amdgpu_device(kgd);
463         uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
464                 queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
465         uint32_t i = 0, reg;
466 #undef HQD_N_REGS
467 #define HQD_N_REGS (19+4+2+3+7)
468
469         *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
470         if (*dump == NULL)
471                 return -ENOMEM;
472
473         for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
474                 DUMP_REG(sdma_offset + reg);
475         for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
476              reg++)
477                 DUMP_REG(sdma_offset + reg);
478         for (reg = mmSDMA0_RLC0_CSA_ADDR_LO; reg <= mmSDMA0_RLC0_CSA_ADDR_HI;
479              reg++)
480                 DUMP_REG(sdma_offset + reg);
481         for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_DUMMY_REG;
482              reg++)
483                 DUMP_REG(sdma_offset + reg);
484         for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL;
485              reg++)
486                 DUMP_REG(sdma_offset + reg);
487
488         WARN_ON_ONCE(i != HQD_N_REGS);
489         *n_regs = i;
490
491         return 0;
492 }
493
494 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
495                                 uint32_t pipe_id, uint32_t queue_id)
496 {
497         struct amdgpu_device *adev = get_amdgpu_device(kgd);
498         uint32_t act;
499         bool retval = false;
500         uint32_t low, high;
501
502         acquire_queue(kgd, pipe_id, queue_id);
503         act = RREG32(mmCP_HQD_ACTIVE);
504         if (act) {
505                 low = lower_32_bits(queue_address >> 8);
506                 high = upper_32_bits(queue_address >> 8);
507
508                 if (low == RREG32(mmCP_HQD_PQ_BASE) &&
509                                 high == RREG32(mmCP_HQD_PQ_BASE_HI))
510                         retval = true;
511         }
512         release_queue(kgd);
513         return retval;
514 }
515
516 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
517 {
518         struct amdgpu_device *adev = get_amdgpu_device(kgd);
519         struct vi_sdma_mqd *m;
520         uint32_t sdma_base_addr;
521         uint32_t sdma_rlc_rb_cntl;
522
523         m = get_sdma_mqd(mqd);
524         sdma_base_addr = get_sdma_base_addr(m);
525
526         sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
527
528         if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
529                 return true;
530
531         return false;
532 }
533
534 static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
535                                 enum kfd_preempt_type reset_type,
536                                 unsigned int utimeout, uint32_t pipe_id,
537                                 uint32_t queue_id)
538 {
539         struct amdgpu_device *adev = get_amdgpu_device(kgd);
540         uint32_t temp;
541         enum hqd_dequeue_request_type type;
542         unsigned long flags, end_jiffies;
543         int retry;
544         struct vi_mqd *m = get_mqd(mqd);
545
546         if (adev->in_gpu_reset)
547                 return -EIO;
548
549         acquire_queue(kgd, pipe_id, queue_id);
550
551         if (m->cp_hqd_vmid == 0)
552                 WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0);
553
554         switch (reset_type) {
555         case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
556                 type = DRAIN_PIPE;
557                 break;
558         case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
559                 type = RESET_WAVES;
560                 break;
561         default:
562                 type = DRAIN_PIPE;
563                 break;
564         }
565
566         /* Workaround: If IQ timer is active and the wait time is close to or
567          * equal to 0, dequeueing is not safe. Wait until either the wait time
568          * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
569          * cleared before continuing. Also, ensure wait times are set to at
570          * least 0x3.
571          */
572         local_irq_save(flags);
573         preempt_disable();
574         retry = 5000; /* wait for 500 usecs at maximum */
575         while (true) {
576                 temp = RREG32(mmCP_HQD_IQ_TIMER);
577                 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
578                         pr_debug("HW is processing IQ\n");
579                         goto loop;
580                 }
581                 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
582                         if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
583                                         == 3) /* SEM-rearm is safe */
584                                 break;
585                         /* Wait time 3 is safe for CP, but our MMIO read/write
586                          * time is close to 1 microsecond, so check for 10 to
587                          * leave more buffer room
588                          */
589                         if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
590                                         >= 10)
591                                 break;
592                         pr_debug("IQ timer is active\n");
593                 } else
594                         break;
595 loop:
596                 if (!retry) {
597                         pr_err("CP HQD IQ timer status time out\n");
598                         break;
599                 }
600                 ndelay(100);
601                 --retry;
602         }
603         retry = 1000;
604         while (true) {
605                 temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
606                 if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
607                         break;
608                 pr_debug("Dequeue request is pending\n");
609
610                 if (!retry) {
611                         pr_err("CP HQD dequeue request time out\n");
612                         break;
613                 }
614                 ndelay(100);
615                 --retry;
616         }
617         local_irq_restore(flags);
618         preempt_enable();
619
620         WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
621
622         end_jiffies = (utimeout * HZ / 1000) + jiffies;
623         while (true) {
624                 temp = RREG32(mmCP_HQD_ACTIVE);
625                 if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
626                         break;
627                 if (time_after(jiffies, end_jiffies)) {
628                         pr_err("cp queue preemption time out.\n");
629                         release_queue(kgd);
630                         return -ETIME;
631                 }
632                 usleep_range(500, 1000);
633         }
634
635         release_queue(kgd);
636         return 0;
637 }
638
639 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
640                                 unsigned int utimeout)
641 {
642         struct amdgpu_device *adev = get_amdgpu_device(kgd);
643         struct vi_sdma_mqd *m;
644         uint32_t sdma_base_addr;
645         uint32_t temp;
646         unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
647
648         m = get_sdma_mqd(mqd);
649         sdma_base_addr = get_sdma_base_addr(m);
650
651         temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
652         temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
653         WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
654
655         while (true) {
656                 temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
657                 if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
658                         break;
659                 if (time_after(jiffies, end_jiffies))
660                         return -ETIME;
661                 usleep_range(500, 1000);
662         }
663
664         WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
665         WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
666                 RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
667                 SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
668
669         m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
670
671         return 0;
672 }
673
674 static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
675                                                         uint8_t vmid)
676 {
677         uint32_t reg;
678         struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
679
680         reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
681         return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
682 }
683
684 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
685                                                                 uint8_t vmid)
686 {
687         uint32_t reg;
688         struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
689
690         reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
691         return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
692 }
693
694 static int kgd_address_watch_disable(struct kgd_dev *kgd)
695 {
696         return 0;
697 }
698
699 static int kgd_address_watch_execute(struct kgd_dev *kgd,
700                                         unsigned int watch_point_id,
701                                         uint32_t cntl_val,
702                                         uint32_t addr_hi,
703                                         uint32_t addr_lo)
704 {
705         return 0;
706 }
707
708 static int kgd_wave_control_execute(struct kgd_dev *kgd,
709                                         uint32_t gfx_index_val,
710                                         uint32_t sq_cmd)
711 {
712         struct amdgpu_device *adev = get_amdgpu_device(kgd);
713         uint32_t data = 0;
714
715         mutex_lock(&adev->grbm_idx_mutex);
716
717         WREG32(mmGRBM_GFX_INDEX, gfx_index_val);
718         WREG32(mmSQ_CMD, sq_cmd);
719
720         data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
721                 INSTANCE_BROADCAST_WRITES, 1);
722         data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
723                 SH_BROADCAST_WRITES, 1);
724         data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
725                 SE_BROADCAST_WRITES, 1);
726
727         WREG32(mmGRBM_GFX_INDEX, data);
728         mutex_unlock(&adev->grbm_idx_mutex);
729
730         return 0;
731 }
732
733 static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
734                                         unsigned int watch_point_id,
735                                         unsigned int reg_offset)
736 {
737         return 0;
738 }
739
740 static void set_scratch_backing_va(struct kgd_dev *kgd,
741                                         uint64_t va, uint32_t vmid)
742 {
743         struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
744
745         lock_srbm(kgd, 0, 0, 0, vmid);
746         WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
747         unlock_srbm(kgd);
748 }
749
750 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
751                 uint64_t page_table_base)
752 {
753         struct amdgpu_device *adev = get_amdgpu_device(kgd);
754
755         if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
756                 pr_err("trying to set page table base for wrong VMID\n");
757                 return;
758         }
759         WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8,
760                         lower_32_bits(page_table_base));
761 }
762
763 static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
764 {
765         struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
766         int vmid;
767         unsigned int tmp;
768
769         if (adev->in_gpu_reset)
770                 return -EIO;
771
772         for (vmid = 0; vmid < 16; vmid++) {
773                 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
774                         continue;
775
776                 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
777                 if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
778                         (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
779                         WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
780                         RREG32(mmVM_INVALIDATE_RESPONSE);
781                         break;
782                 }
783         }
784
785         return 0;
786 }
787
788 static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
789 {
790         struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
791
792         if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
793                 pr_err("non kfd vmid %d\n", vmid);
794                 return -EINVAL;
795         }
796
797         WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
798         RREG32(mmVM_INVALIDATE_RESPONSE);
799         return 0;
800 }