]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
drm/amdgpu: move waiting for VM flush into gmc_v9_0_emit_flush_gpu_tlb
[linux.git] / drivers / gpu / drm / amd / amdgpu / sdma_v4_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_ucode.h"
28 #include "amdgpu_trace.h"
29
30 #include "sdma0/sdma0_4_0_offset.h"
31 #include "sdma0/sdma0_4_0_sh_mask.h"
32 #include "sdma1/sdma1_4_0_offset.h"
33 #include "sdma1/sdma1_4_0_sh_mask.h"
34 #include "mmhub/mmhub_1_0_offset.h"
35 #include "mmhub/mmhub_1_0_sh_mask.h"
36 #include "hdp/hdp_4_0_offset.h"
37 #include "sdma0/sdma0_4_1_default.h"
38
39 #include "soc15_common.h"
40 #include "soc15.h"
41 #include "vega10_sdma_pkt_open.h"
42
43 MODULE_FIRMWARE("amdgpu/vega10_sdma.bin");
44 MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
45 MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
46
47 #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK  0x000000F8L
48 #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L
49
50 static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev);
51 static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev);
52 static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev);
53 static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev);
54
55 static const struct soc15_reg_golden golden_settings_sdma_4[] = {
56         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
57         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xff000ff0, 0x3f000100),
58         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0100, 0x00000100),
59         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
60         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_IB_CNTL, 0x800f0100, 0x00000100),
61         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
62         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003ff006, 0x0003c000),
63         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL, 0x800f0100, 0x00000100),
64         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
65         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0100, 0x00000100),
66         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
67         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
68         SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
69         SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
70         SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_IB_CNTL, 0x800f0100, 0x00000100),
71         SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
72         SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_IB_CNTL, 0x800f0100, 0x00000100),
73         SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
74         SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_POWER_CNTL, 0x003ff000, 0x0003c000),
75         SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_IB_CNTL, 0x800f0100, 0x00000100),
76         SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
77         SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_IB_CNTL, 0x800f0100, 0x00000100),
78         SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
79         SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0)
80 };
81
82 static const struct soc15_reg_golden golden_settings_sdma_vg10[] = {
83         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
84         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
85         SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
86         SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002)
87 };
88
89 static const struct soc15_reg_golden golden_settings_sdma_4_1[] =
90 {
91         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
92         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
93         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100),
94         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
95         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0xfc3fffff, 0x40000051),
96         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100),
97         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
98         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100),
99         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
100         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0)
101 };
102
103 static const struct soc15_reg_golden golden_settings_sdma_rv1[] =
104 {
105         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002),
106         SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002)
107 };
108
109 static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,
110                 u32 instance, u32 offset)
111 {
112         return ( 0 == instance ? (adev->reg_offset[SDMA0_HWIP][0][0] + offset) :
113                         (adev->reg_offset[SDMA1_HWIP][0][0] + offset));
114 }
115
116 static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
117 {
118         switch (adev->asic_type) {
119         case CHIP_VEGA10:
120                 soc15_program_register_sequence(adev,
121                                                  golden_settings_sdma_4,
122                                                  ARRAY_SIZE(golden_settings_sdma_4));
123                 soc15_program_register_sequence(adev,
124                                                  golden_settings_sdma_vg10,
125                                                  ARRAY_SIZE(golden_settings_sdma_vg10));
126                 break;
127         case CHIP_RAVEN:
128                 soc15_program_register_sequence(adev,
129                                                  golden_settings_sdma_4_1,
130                                                  ARRAY_SIZE(golden_settings_sdma_4_1));
131                 soc15_program_register_sequence(adev,
132                                                  golden_settings_sdma_rv1,
133                                                  ARRAY_SIZE(golden_settings_sdma_rv1));
134                 break;
135         default:
136                 break;
137         }
138 }
139
140 /**
141  * sdma_v4_0_init_microcode - load ucode images from disk
142  *
143  * @adev: amdgpu_device pointer
144  *
145  * Use the firmware interface to load the ucode images into
146  * the driver (not loaded into hw).
147  * Returns 0 on success, error on failure.
148  */
149
150 // emulation only, won't work on real chip
151 // vega10 real chip need to use PSP to load firmware
152 static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
153 {
154         const char *chip_name;
155         char fw_name[30];
156         int err = 0, i;
157         struct amdgpu_firmware_info *info = NULL;
158         const struct common_firmware_header *header = NULL;
159         const struct sdma_firmware_header_v1_0 *hdr;
160
161         DRM_DEBUG("\n");
162
163         switch (adev->asic_type) {
164         case CHIP_VEGA10:
165                 chip_name = "vega10";
166                 break;
167         case CHIP_RAVEN:
168                 chip_name = "raven";
169                 break;
170         default:
171                 BUG();
172         }
173
174         for (i = 0; i < adev->sdma.num_instances; i++) {
175                 if (i == 0)
176                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
177                 else
178                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
179                 err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
180                 if (err)
181                         goto out;
182                 err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
183                 if (err)
184                         goto out;
185                 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
186                 adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
187                 adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
188                 if (adev->sdma.instance[i].feature_version >= 20)
189                         adev->sdma.instance[i].burst_nop = true;
190                 DRM_DEBUG("psp_load == '%s'\n",
191                                 adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
192
193                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
194                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
195                         info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
196                         info->fw = adev->sdma.instance[i].fw;
197                         header = (const struct common_firmware_header *)info->fw->data;
198                         adev->firmware.fw_size +=
199                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
200                 }
201         }
202 out:
203         if (err) {
204                 DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name);
205                 for (i = 0; i < adev->sdma.num_instances; i++) {
206                         release_firmware(adev->sdma.instance[i].fw);
207                         adev->sdma.instance[i].fw = NULL;
208                 }
209         }
210         return err;
211 }
212
213 /**
214  * sdma_v4_0_ring_get_rptr - get the current read pointer
215  *
216  * @ring: amdgpu ring pointer
217  *
218  * Get the current rptr from the hardware (VEGA10+).
219  */
220 static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
221 {
222         u64 *rptr;
223
224         /* XXX check if swapping is necessary on BE */
225         rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
226
227         DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
228         return ((*rptr) >> 2);
229 }
230
231 /**
232  * sdma_v4_0_ring_get_wptr - get the current write pointer
233  *
234  * @ring: amdgpu ring pointer
235  *
236  * Get the current wptr from the hardware (VEGA10+).
237  */
238 static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
239 {
240         struct amdgpu_device *adev = ring->adev;
241         u64 *wptr = NULL;
242         uint64_t local_wptr = 0;
243
244         if (ring->use_doorbell) {
245                 /* XXX check if swapping is necessary on BE */
246                 wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]);
247                 DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr);
248                 *wptr = (*wptr) >> 2;
249                 DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr);
250         } else {
251                 u32 lowbit, highbit;
252                 int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
253
254                 wptr = &local_wptr;
255                 lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR)) >> 2;
256                 highbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
257
258                 DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n",
259                                 me, highbit, lowbit);
260                 *wptr = highbit;
261                 *wptr = (*wptr) << 32;
262                 *wptr |= lowbit;
263         }
264
265         return *wptr;
266 }
267
268 /**
269  * sdma_v4_0_ring_set_wptr - commit the write pointer
270  *
271  * @ring: amdgpu ring pointer
272  *
273  * Write the wptr back to the hardware (VEGA10+).
274  */
275 static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
276 {
277         struct amdgpu_device *adev = ring->adev;
278
279         DRM_DEBUG("Setting write pointer\n");
280         if (ring->use_doorbell) {
281                 u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
282
283                 DRM_DEBUG("Using doorbell -- "
284                                 "wptr_offs == 0x%08x "
285                                 "lower_32_bits(ring->wptr) << 2 == 0x%08x "
286                                 "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
287                                 ring->wptr_offs,
288                                 lower_32_bits(ring->wptr << 2),
289                                 upper_32_bits(ring->wptr << 2));
290                 /* XXX check if swapping is necessary on BE */
291                 WRITE_ONCE(*wb, (ring->wptr << 2));
292                 DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
293                                 ring->doorbell_index, ring->wptr << 2);
294                 WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
295         } else {
296                 int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1;
297
298                 DRM_DEBUG("Not using doorbell -- "
299                                 "mmSDMA%i_GFX_RB_WPTR == 0x%08x "
300                                 "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
301                                 me,
302                                 lower_32_bits(ring->wptr << 2),
303                                 me,
304                                 upper_32_bits(ring->wptr << 2));
305                 WREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
306                 WREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
307         }
308 }
309
310 static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
311 {
312         struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
313         int i;
314
315         for (i = 0; i < count; i++)
316                 if (sdma && sdma->burst_nop && (i == 0))
317                         amdgpu_ring_write(ring, ring->funcs->nop |
318                                 SDMA_PKT_NOP_HEADER_COUNT(count - 1));
319                 else
320                         amdgpu_ring_write(ring, ring->funcs->nop);
321 }
322
323 /**
324  * sdma_v4_0_ring_emit_ib - Schedule an IB on the DMA engine
325  *
326  * @ring: amdgpu ring pointer
327  * @ib: IB object to schedule
328  *
329  * Schedule an IB in the DMA ring (VEGA10).
330  */
331 static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
332                                         struct amdgpu_ib *ib,
333                                         unsigned vmid, bool ctx_switch)
334 {
335         /* IB packet must end on a 8 DW boundary */
336         sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
337
338         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
339                           SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
340         /* base must be 32 byte aligned */
341         amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
342         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
343         amdgpu_ring_write(ring, ib->length_dw);
344         amdgpu_ring_write(ring, 0);
345         amdgpu_ring_write(ring, 0);
346
347 }
348
349 /**
350  * sdma_v4_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
351  *
352  * @ring: amdgpu ring pointer
353  *
354  * Emit an hdp flush packet on the requested DMA ring.
355  */
356 static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
357 {
358         struct amdgpu_device *adev = ring->adev;
359         u32 ref_and_mask = 0;
360         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
361
362         if (ring == &ring->adev->sdma.instance[0].ring)
363                 ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0;
364         else
365                 ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1;
366
367         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
368                           SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
369                           SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
370         amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2);
371         amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2);
372         amdgpu_ring_write(ring, ref_and_mask); /* reference */
373         amdgpu_ring_write(ring, ref_and_mask); /* mask */
374         amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
375                           SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
376 }
377
378 /**
379  * sdma_v4_0_ring_emit_fence - emit a fence on the DMA ring
380  *
381  * @ring: amdgpu ring pointer
382  * @fence: amdgpu fence object
383  *
384  * Add a DMA fence packet to the ring to write
385  * the fence seq number and DMA trap packet to generate
386  * an interrupt if needed (VEGA10).
387  */
388 static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
389                                       unsigned flags)
390 {
391         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
392         /* write the fence */
393         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
394         /* zero in first two bits */
395         BUG_ON(addr & 0x3);
396         amdgpu_ring_write(ring, lower_32_bits(addr));
397         amdgpu_ring_write(ring, upper_32_bits(addr));
398         amdgpu_ring_write(ring, lower_32_bits(seq));
399
400         /* optionally write high bits as well */
401         if (write64bit) {
402                 addr += 4;
403                 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
404                 /* zero in first two bits */
405                 BUG_ON(addr & 0x3);
406                 amdgpu_ring_write(ring, lower_32_bits(addr));
407                 amdgpu_ring_write(ring, upper_32_bits(addr));
408                 amdgpu_ring_write(ring, upper_32_bits(seq));
409         }
410
411         /* generate an interrupt */
412         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
413         amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
414 }
415
416
417 /**
418  * sdma_v4_0_gfx_stop - stop the gfx async dma engines
419  *
420  * @adev: amdgpu_device pointer
421  *
422  * Stop the gfx async dma ring buffers (VEGA10).
423  */
424 static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
425 {
426         struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
427         struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
428         u32 rb_cntl, ib_cntl;
429         int i;
430
431         if ((adev->mman.buffer_funcs_ring == sdma0) ||
432             (adev->mman.buffer_funcs_ring == sdma1))
433                 amdgpu_ttm_set_active_vram_size(adev, adev->gmc.visible_vram_size);
434
435         for (i = 0; i < adev->sdma.num_instances; i++) {
436                 rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
437                 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
438                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
439                 ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
440                 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
441                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
442         }
443
444         sdma0->ready = false;
445         sdma1->ready = false;
446 }
447
448 /**
449  * sdma_v4_0_rlc_stop - stop the compute async dma engines
450  *
451  * @adev: amdgpu_device pointer
452  *
453  * Stop the compute async dma queues (VEGA10).
454  */
455 static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev)
456 {
457         /* XXX todo */
458 }
459
460 /**
461  * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch
462  *
463  * @adev: amdgpu_device pointer
464  * @enable: enable/disable the DMA MEs context switch.
465  *
466  * Halt or unhalt the async dma engines context switch (VEGA10).
467  */
468 static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
469 {
470         u32 f32_cntl, phase_quantum = 0;
471         int i;
472
473         if (amdgpu_sdma_phase_quantum) {
474                 unsigned value = amdgpu_sdma_phase_quantum;
475                 unsigned unit = 0;
476
477                 while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
478                                 SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
479                         value = (value + 1) >> 1;
480                         unit++;
481                 }
482                 if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
483                             SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
484                         value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
485                                  SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
486                         unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
487                                 SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
488                         WARN_ONCE(1,
489                         "clamping sdma_phase_quantum to %uK clock cycles\n",
490                                   value << unit);
491                 }
492                 phase_quantum =
493                         value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
494                         unit  << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
495         }
496
497         for (i = 0; i < adev->sdma.num_instances; i++) {
498                 f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
499                 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
500                                 AUTO_CTXSW_ENABLE, enable ? 1 : 0);
501                 if (enable && amdgpu_sdma_phase_quantum) {
502                         WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
503                                phase_quantum);
504                         WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
505                                phase_quantum);
506                         WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
507                                phase_quantum);
508                 }
509                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
510         }
511
512 }
513
514 /**
515  * sdma_v4_0_enable - stop the async dma engines
516  *
517  * @adev: amdgpu_device pointer
518  * @enable: enable/disable the DMA MEs.
519  *
520  * Halt or unhalt the async dma engines (VEGA10).
521  */
522 static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
523 {
524         u32 f32_cntl;
525         int i;
526
527         if (enable == false) {
528                 sdma_v4_0_gfx_stop(adev);
529                 sdma_v4_0_rlc_stop(adev);
530         }
531
532         for (i = 0; i < adev->sdma.num_instances; i++) {
533                 f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
534                 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
535                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
536         }
537 }
538
539 /**
540  * sdma_v4_0_gfx_resume - setup and start the async dma engines
541  *
542  * @adev: amdgpu_device pointer
543  *
544  * Set up the gfx DMA ring buffers and enable them (VEGA10).
545  * Returns 0 for success, error for failure.
546  */
547 static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
548 {
549         struct amdgpu_ring *ring;
550         u32 rb_cntl, ib_cntl, wptr_poll_cntl;
551         u32 rb_bufsz;
552         u32 wb_offset;
553         u32 doorbell;
554         u32 doorbell_offset;
555         u32 temp;
556         u64 wptr_gpu_addr;
557         int i, r;
558
559         for (i = 0; i < adev->sdma.num_instances; i++) {
560                 ring = &adev->sdma.instance[i].ring;
561                 wb_offset = (ring->rptr_offs * 4);
562
563                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
564
565                 /* Set ring buffer size in dwords */
566                 rb_bufsz = order_base_2(ring->ring_size / 4);
567                 rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
568                 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
569 #ifdef __BIG_ENDIAN
570                 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
571                 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
572                                         RPTR_WRITEBACK_SWAP_ENABLE, 1);
573 #endif
574                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
575
576                 /* Initialize the ring buffer's read and write pointers */
577                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
578                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
579                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
580                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
581
582                 /* set the wb address whether it's enabled or not */
583                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
584                        upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
585                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
586                        lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
587
588                 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
589
590                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
591                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
592
593                 ring->wptr = 0;
594
595                 /* before programing wptr to a less value, need set minor_ptr_update first */
596                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
597
598                 if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
599                         WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
600                         WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
601                 }
602
603                 doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
604                 doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
605
606                 if (ring->use_doorbell) {
607                         doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
608                         doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
609                                         OFFSET, ring->doorbell_index);
610                 } else {
611                         doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
612                 }
613                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
614                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
615                 adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
616                                                       ring->doorbell_index);
617
618                 if (amdgpu_sriov_vf(adev))
619                         sdma_v4_0_ring_set_wptr(ring);
620
621                 /* set minor_ptr_update to 0 after wptr programed */
622                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
623
624                 /* set utc l1 enable flag always to 1 */
625                 temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
626                 temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
627                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
628
629                 if (!amdgpu_sriov_vf(adev)) {
630                         /* unhalt engine */
631                         temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
632                         temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
633                         WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
634                 }
635
636                 /* setup the wptr shadow polling */
637                 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
638                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
639                        lower_32_bits(wptr_gpu_addr));
640                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
641                        upper_32_bits(wptr_gpu_addr));
642                 wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
643                 if (amdgpu_sriov_vf(adev))
644                         wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
645                 else
646                         wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
647                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
648
649                 /* enable DMA RB */
650                 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
651                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
652
653                 ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
654                 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
655 #ifdef __BIG_ENDIAN
656                 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
657 #endif
658                 /* enable DMA IBs */
659                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
660
661                 ring->ready = true;
662
663                 if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
664                         sdma_v4_0_ctx_switch_enable(adev, true);
665                         sdma_v4_0_enable(adev, true);
666                 }
667
668                 r = amdgpu_ring_test_ring(ring);
669                 if (r) {
670                         ring->ready = false;
671                         return r;
672                 }
673
674                 if (adev->mman.buffer_funcs_ring == ring)
675                         amdgpu_ttm_set_active_vram_size(adev, adev->gmc.real_vram_size);
676
677         }
678
679         return 0;
680 }
681
682 static void
683 sdma_v4_1_update_power_gating(struct amdgpu_device *adev, bool enable)
684 {
685         uint32_t def, data;
686
687         if (enable && (adev->pg_flags & AMD_PG_SUPPORT_SDMA)) {
688                 /* disable idle interrupt */
689                 def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL));
690                 data |= SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK;
691
692                 if (data != def)
693                         WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data);
694         } else {
695                 /* disable idle interrupt */
696                 def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL));
697                 data &= ~SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK;
698                 if (data != def)
699                         WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data);
700         }
701 }
702
703 static void sdma_v4_1_init_power_gating(struct amdgpu_device *adev)
704 {
705         uint32_t def, data;
706
707         /* Enable HW based PG. */
708         def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
709         data |= SDMA0_POWER_CNTL__PG_CNTL_ENABLE_MASK;
710         if (data != def)
711                 WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
712
713         /* enable interrupt */
714         def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL));
715         data |= SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK;
716         if (data != def)
717                 WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data);
718
719         /* Configure hold time to filter in-valid power on/off request. Use default right now */
720         def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
721         data &= ~SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK;
722         data |= (mmSDMA0_POWER_CNTL_DEFAULT & SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK);
723         /* Configure switch time for hysteresis purpose. Use default right now */
724         data &= ~SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK;
725         data |= (mmSDMA0_POWER_CNTL_DEFAULT & SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK);
726         if(data != def)
727                 WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
728 }
729
730 static void sdma_v4_0_init_pg(struct amdgpu_device *adev)
731 {
732         if (!(adev->pg_flags & AMD_PG_SUPPORT_SDMA))
733                 return;
734
735         switch (adev->asic_type) {
736         case CHIP_RAVEN:
737                 sdma_v4_1_init_power_gating(adev);
738                 sdma_v4_1_update_power_gating(adev, true);
739                 break;
740         default:
741                 break;
742         }
743 }
744
745 /**
746  * sdma_v4_0_rlc_resume - setup and start the async dma engines
747  *
748  * @adev: amdgpu_device pointer
749  *
750  * Set up the compute DMA queues and enable them (VEGA10).
751  * Returns 0 for success, error for failure.
752  */
753 static int sdma_v4_0_rlc_resume(struct amdgpu_device *adev)
754 {
755         sdma_v4_0_init_pg(adev);
756
757         return 0;
758 }
759
760 /**
761  * sdma_v4_0_load_microcode - load the sDMA ME ucode
762  *
763  * @adev: amdgpu_device pointer
764  *
765  * Loads the sDMA0/1 ucode.
766  * Returns 0 for success, -EINVAL if the ucode is not available.
767  */
768 static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
769 {
770         const struct sdma_firmware_header_v1_0 *hdr;
771         const __le32 *fw_data;
772         u32 fw_size;
773         int i, j;
774
775         /* halt the MEs */
776         sdma_v4_0_enable(adev, false);
777
778         for (i = 0; i < adev->sdma.num_instances; i++) {
779                 if (!adev->sdma.instance[i].fw)
780                         return -EINVAL;
781
782                 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
783                 amdgpu_ucode_print_sdma_hdr(&hdr->header);
784                 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
785
786                 fw_data = (const __le32 *)
787                         (adev->sdma.instance[i].fw->data +
788                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
789
790                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0);
791
792                 for (j = 0; j < fw_size; j++)
793                         WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
794
795                 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version);
796         }
797
798         return 0;
799 }
800
801 /**
802  * sdma_v4_0_start - setup and start the async dma engines
803  *
804  * @adev: amdgpu_device pointer
805  *
806  * Set up the DMA engines and enable them (VEGA10).
807  * Returns 0 for success, error for failure.
808  */
809 static int sdma_v4_0_start(struct amdgpu_device *adev)
810 {
811         int r = 0;
812
813         if (amdgpu_sriov_vf(adev)) {
814                 sdma_v4_0_ctx_switch_enable(adev, false);
815                 sdma_v4_0_enable(adev, false);
816
817                 /* set RB registers */
818                 r = sdma_v4_0_gfx_resume(adev);
819                 return r;
820         }
821
822         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
823                 r = sdma_v4_0_load_microcode(adev);
824                 if (r)
825                         return r;
826         }
827
828         /* unhalt the MEs */
829         sdma_v4_0_enable(adev, true);
830         /* enable sdma ring preemption */
831         sdma_v4_0_ctx_switch_enable(adev, true);
832
833         /* start the gfx rings and rlc compute queues */
834         r = sdma_v4_0_gfx_resume(adev);
835         if (r)
836                 return r;
837         r = sdma_v4_0_rlc_resume(adev);
838
839         return r;
840 }
841
842 /**
843  * sdma_v4_0_ring_test_ring - simple async dma engine test
844  *
845  * @ring: amdgpu_ring structure holding ring information
846  *
847  * Test the DMA engine by writing using it to write an
848  * value to memory. (VEGA10).
849  * Returns 0 for success, error for failure.
850  */
851 static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring)
852 {
853         struct amdgpu_device *adev = ring->adev;
854         unsigned i;
855         unsigned index;
856         int r;
857         u32 tmp;
858         u64 gpu_addr;
859
860         r = amdgpu_device_wb_get(adev, &index);
861         if (r) {
862                 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
863                 return r;
864         }
865
866         gpu_addr = adev->wb.gpu_addr + (index * 4);
867         tmp = 0xCAFEDEAD;
868         adev->wb.wb[index] = cpu_to_le32(tmp);
869
870         r = amdgpu_ring_alloc(ring, 5);
871         if (r) {
872                 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
873                 amdgpu_device_wb_free(adev, index);
874                 return r;
875         }
876
877         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
878                           SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
879         amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
880         amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
881         amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
882         amdgpu_ring_write(ring, 0xDEADBEEF);
883         amdgpu_ring_commit(ring);
884
885         for (i = 0; i < adev->usec_timeout; i++) {
886                 tmp = le32_to_cpu(adev->wb.wb[index]);
887                 if (tmp == 0xDEADBEEF)
888                         break;
889                 DRM_UDELAY(1);
890         }
891
892         if (i < adev->usec_timeout) {
893                 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
894         } else {
895                 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
896                           ring->idx, tmp);
897                 r = -EINVAL;
898         }
899         amdgpu_device_wb_free(adev, index);
900
901         return r;
902 }
903
904 /**
905  * sdma_v4_0_ring_test_ib - test an IB on the DMA engine
906  *
907  * @ring: amdgpu_ring structure holding ring information
908  *
909  * Test a simple IB in the DMA ring (VEGA10).
910  * Returns 0 on success, error on failure.
911  */
912 static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
913 {
914         struct amdgpu_device *adev = ring->adev;
915         struct amdgpu_ib ib;
916         struct dma_fence *f = NULL;
917         unsigned index;
918         long r;
919         u32 tmp = 0;
920         u64 gpu_addr;
921
922         r = amdgpu_device_wb_get(adev, &index);
923         if (r) {
924                 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
925                 return r;
926         }
927
928         gpu_addr = adev->wb.gpu_addr + (index * 4);
929         tmp = 0xCAFEDEAD;
930         adev->wb.wb[index] = cpu_to_le32(tmp);
931         memset(&ib, 0, sizeof(ib));
932         r = amdgpu_ib_get(adev, NULL, 256, &ib);
933         if (r) {
934                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
935                 goto err0;
936         }
937
938         ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
939                 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
940         ib.ptr[1] = lower_32_bits(gpu_addr);
941         ib.ptr[2] = upper_32_bits(gpu_addr);
942         ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
943         ib.ptr[4] = 0xDEADBEEF;
944         ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
945         ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
946         ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
947         ib.length_dw = 8;
948
949         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
950         if (r)
951                 goto err1;
952
953         r = dma_fence_wait_timeout(f, false, timeout);
954         if (r == 0) {
955                 DRM_ERROR("amdgpu: IB test timed out\n");
956                 r = -ETIMEDOUT;
957                 goto err1;
958         } else if (r < 0) {
959                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
960                 goto err1;
961         }
962         tmp = le32_to_cpu(adev->wb.wb[index]);
963         if (tmp == 0xDEADBEEF) {
964                 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
965                 r = 0;
966         } else {
967                 DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
968                 r = -EINVAL;
969         }
970 err1:
971         amdgpu_ib_free(adev, &ib, NULL);
972         dma_fence_put(f);
973 err0:
974         amdgpu_device_wb_free(adev, index);
975         return r;
976 }
977
978
979 /**
980  * sdma_v4_0_vm_copy_pte - update PTEs by copying them from the GART
981  *
982  * @ib: indirect buffer to fill with commands
983  * @pe: addr of the page entry
984  * @src: src addr to copy from
985  * @count: number of page entries to update
986  *
987  * Update PTEs by copying them from the GART using sDMA (VEGA10).
988  */
989 static void sdma_v4_0_vm_copy_pte(struct amdgpu_ib *ib,
990                                   uint64_t pe, uint64_t src,
991                                   unsigned count)
992 {
993         unsigned bytes = count * 8;
994
995         ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
996                 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
997         ib->ptr[ib->length_dw++] = bytes - 1;
998         ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
999         ib->ptr[ib->length_dw++] = lower_32_bits(src);
1000         ib->ptr[ib->length_dw++] = upper_32_bits(src);
1001         ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1002         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1003
1004 }
1005
1006 /**
1007  * sdma_v4_0_vm_write_pte - update PTEs by writing them manually
1008  *
1009  * @ib: indirect buffer to fill with commands
1010  * @pe: addr of the page entry
1011  * @addr: dst addr to write into pe
1012  * @count: number of page entries to update
1013  * @incr: increase next addr by incr bytes
1014  * @flags: access flags
1015  *
1016  * Update PTEs by writing them manually using sDMA (VEGA10).
1017  */
1018 static void sdma_v4_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
1019                                    uint64_t value, unsigned count,
1020                                    uint32_t incr)
1021 {
1022         unsigned ndw = count * 2;
1023
1024         ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
1025                 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
1026         ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1027         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1028         ib->ptr[ib->length_dw++] = ndw - 1;
1029         for (; ndw > 0; ndw -= 2) {
1030                 ib->ptr[ib->length_dw++] = lower_32_bits(value);
1031                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
1032                 value += incr;
1033         }
1034 }
1035
1036 /**
1037  * sdma_v4_0_vm_set_pte_pde - update the page tables using sDMA
1038  *
1039  * @ib: indirect buffer to fill with commands
1040  * @pe: addr of the page entry
1041  * @addr: dst addr to write into pe
1042  * @count: number of page entries to update
1043  * @incr: increase next addr by incr bytes
1044  * @flags: access flags
1045  *
1046  * Update the page tables using sDMA (VEGA10).
1047  */
1048 static void sdma_v4_0_vm_set_pte_pde(struct amdgpu_ib *ib,
1049                                      uint64_t pe,
1050                                      uint64_t addr, unsigned count,
1051                                      uint32_t incr, uint64_t flags)
1052 {
1053         /* for physically contiguous pages (vram) */
1054         ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE);
1055         ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
1056         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1057         ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
1058         ib->ptr[ib->length_dw++] = upper_32_bits(flags);
1059         ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
1060         ib->ptr[ib->length_dw++] = upper_32_bits(addr);
1061         ib->ptr[ib->length_dw++] = incr; /* increment size */
1062         ib->ptr[ib->length_dw++] = 0;
1063         ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
1064 }
1065
1066 /**
1067  * sdma_v4_0_ring_pad_ib - pad the IB to the required number of dw
1068  *
1069  * @ib: indirect buffer to fill with padding
1070  *
1071  */
1072 static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
1073 {
1074         struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
1075         u32 pad_count;
1076         int i;
1077
1078         pad_count = (8 - (ib->length_dw & 0x7)) % 8;
1079         for (i = 0; i < pad_count; i++)
1080                 if (sdma && sdma->burst_nop && (i == 0))
1081                         ib->ptr[ib->length_dw++] =
1082                                 SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
1083                                 SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
1084                 else
1085                         ib->ptr[ib->length_dw++] =
1086                                 SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
1087 }
1088
1089
1090 /**
1091  * sdma_v4_0_ring_emit_pipeline_sync - sync the pipeline
1092  *
1093  * @ring: amdgpu_ring pointer
1094  *
1095  * Make sure all previous operations are completed (CIK).
1096  */
1097 static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
1098 {
1099         uint32_t seq = ring->fence_drv.sync_seq;
1100         uint64_t addr = ring->fence_drv.gpu_addr;
1101
1102         /* wait for idle */
1103         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
1104                           SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
1105                           SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
1106                           SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
1107         amdgpu_ring_write(ring, addr & 0xfffffffc);
1108         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
1109         amdgpu_ring_write(ring, seq); /* reference */
1110         amdgpu_ring_write(ring, 0xfffffff); /* mask */
1111         amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1112                           SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
1113 }
1114
1115
1116 /**
1117  * sdma_v4_0_ring_emit_vm_flush - vm flush using sDMA
1118  *
1119  * @ring: amdgpu_ring pointer
1120  * @vm: amdgpu_vm pointer
1121  *
1122  * Update the page table base and flush the VM TLB
1123  * using sDMA (VEGA10).
1124  */
1125 static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
1126                                          unsigned vmid, unsigned pasid,
1127                                          uint64_t pd_addr)
1128 {
1129         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr);
1130 }
1131
1132 static void sdma_v4_0_ring_emit_wreg(struct amdgpu_ring *ring,
1133                                      uint32_t reg, uint32_t val)
1134 {
1135         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
1136                           SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
1137         amdgpu_ring_write(ring, reg);
1138         amdgpu_ring_write(ring, val);
1139 }
1140
1141 static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1142                                          uint32_t val, uint32_t mask)
1143 {
1144         amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
1145                           SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
1146                           SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
1147         amdgpu_ring_write(ring, reg << 2);
1148         amdgpu_ring_write(ring, 0);
1149         amdgpu_ring_write(ring, val); /* reference */
1150         amdgpu_ring_write(ring, mask); /* mask */
1151         amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1152                           SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
1153 }
1154
1155 static int sdma_v4_0_early_init(void *handle)
1156 {
1157         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1158
1159         if (adev->asic_type == CHIP_RAVEN)
1160                 adev->sdma.num_instances = 1;
1161         else
1162                 adev->sdma.num_instances = 2;
1163
1164         sdma_v4_0_set_ring_funcs(adev);
1165         sdma_v4_0_set_buffer_funcs(adev);
1166         sdma_v4_0_set_vm_pte_funcs(adev);
1167         sdma_v4_0_set_irq_funcs(adev);
1168
1169         return 0;
1170 }
1171
1172
1173 static int sdma_v4_0_sw_init(void *handle)
1174 {
1175         struct amdgpu_ring *ring;
1176         int r, i;
1177         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1178
1179         /* SDMA trap event */
1180         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_SDMA0, 224,
1181                               &adev->sdma.trap_irq);
1182         if (r)
1183                 return r;
1184
1185         /* SDMA trap event */
1186         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_SDMA1, 224,
1187                               &adev->sdma.trap_irq);
1188         if (r)
1189                 return r;
1190
1191         r = sdma_v4_0_init_microcode(adev);
1192         if (r) {
1193                 DRM_ERROR("Failed to load sdma firmware!\n");
1194                 return r;
1195         }
1196
1197         for (i = 0; i < adev->sdma.num_instances; i++) {
1198                 ring = &adev->sdma.instance[i].ring;
1199                 ring->ring_obj = NULL;
1200                 ring->use_doorbell = true;
1201
1202                 DRM_INFO("use_doorbell being set to: [%s]\n",
1203                                 ring->use_doorbell?"true":"false");
1204
1205                 ring->doorbell_index = (i == 0) ?
1206                         (AMDGPU_DOORBELL64_sDMA_ENGINE0 << 1) //get DWORD offset
1207                         : (AMDGPU_DOORBELL64_sDMA_ENGINE1 << 1); // get DWORD offset
1208
1209                 sprintf(ring->name, "sdma%d", i);
1210                 r = amdgpu_ring_init(adev, ring, 1024,
1211                                      &adev->sdma.trap_irq,
1212                                      (i == 0) ?
1213                                      AMDGPU_SDMA_IRQ_TRAP0 :
1214                                      AMDGPU_SDMA_IRQ_TRAP1);
1215                 if (r)
1216                         return r;
1217         }
1218
1219         return r;
1220 }
1221
1222 static int sdma_v4_0_sw_fini(void *handle)
1223 {
1224         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1225         int i;
1226
1227         for (i = 0; i < adev->sdma.num_instances; i++)
1228                 amdgpu_ring_fini(&adev->sdma.instance[i].ring);
1229
1230         for (i = 0; i < adev->sdma.num_instances; i++) {
1231                 release_firmware(adev->sdma.instance[i].fw);
1232                 adev->sdma.instance[i].fw = NULL;
1233         }
1234
1235         return 0;
1236 }
1237
1238 static int sdma_v4_0_hw_init(void *handle)
1239 {
1240         int r;
1241         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1242
1243         sdma_v4_0_init_golden_registers(adev);
1244
1245         r = sdma_v4_0_start(adev);
1246
1247         return r;
1248 }
1249
1250 static int sdma_v4_0_hw_fini(void *handle)
1251 {
1252         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1253
1254         if (amdgpu_sriov_vf(adev))
1255                 return 0;
1256
1257         sdma_v4_0_ctx_switch_enable(adev, false);
1258         sdma_v4_0_enable(adev, false);
1259
1260         return 0;
1261 }
1262
1263 static int sdma_v4_0_suspend(void *handle)
1264 {
1265         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1266
1267         return sdma_v4_0_hw_fini(adev);
1268 }
1269
1270 static int sdma_v4_0_resume(void *handle)
1271 {
1272         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1273
1274         return sdma_v4_0_hw_init(adev);
1275 }
1276
1277 static bool sdma_v4_0_is_idle(void *handle)
1278 {
1279         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1280         u32 i;
1281
1282         for (i = 0; i < adev->sdma.num_instances; i++) {
1283                 u32 tmp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_STATUS_REG));
1284
1285                 if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
1286                         return false;
1287         }
1288
1289         return true;
1290 }
1291
1292 static int sdma_v4_0_wait_for_idle(void *handle)
1293 {
1294         unsigned i;
1295         u32 sdma0, sdma1;
1296         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1297
1298         for (i = 0; i < adev->usec_timeout; i++) {
1299                 sdma0 = RREG32(sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
1300                 sdma1 = RREG32(sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG));
1301
1302                 if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK)
1303                         return 0;
1304                 udelay(1);
1305         }
1306         return -ETIMEDOUT;
1307 }
1308
1309 static int sdma_v4_0_soft_reset(void *handle)
1310 {
1311         /* todo */
1312
1313         return 0;
1314 }
1315
1316 static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev,
1317                                         struct amdgpu_irq_src *source,
1318                                         unsigned type,
1319                                         enum amdgpu_interrupt_state state)
1320 {
1321         u32 sdma_cntl;
1322
1323         u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ?
1324                 sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) :
1325                 sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_CNTL);
1326
1327         sdma_cntl = RREG32(reg_offset);
1328         sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
1329                        state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
1330         WREG32(reg_offset, sdma_cntl);
1331
1332         return 0;
1333 }
1334
1335 static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
1336                                       struct amdgpu_irq_src *source,
1337                                       struct amdgpu_iv_entry *entry)
1338 {
1339         DRM_DEBUG("IH: SDMA trap\n");
1340         switch (entry->client_id) {
1341         case AMDGPU_IH_CLIENTID_SDMA0:
1342                 switch (entry->ring_id) {
1343                 case 0:
1344                         amdgpu_fence_process(&adev->sdma.instance[0].ring);
1345                         break;
1346                 case 1:
1347                         /* XXX compute */
1348                         break;
1349                 case 2:
1350                         /* XXX compute */
1351                         break;
1352                 case 3:
1353                         /* XXX page queue*/
1354                         break;
1355                 }
1356                 break;
1357         case AMDGPU_IH_CLIENTID_SDMA1:
1358                 switch (entry->ring_id) {
1359                 case 0:
1360                         amdgpu_fence_process(&adev->sdma.instance[1].ring);
1361                         break;
1362                 case 1:
1363                         /* XXX compute */
1364                         break;
1365                 case 2:
1366                         /* XXX compute */
1367                         break;
1368                 case 3:
1369                         /* XXX page queue*/
1370                         break;
1371                 }
1372                 break;
1373         }
1374         return 0;
1375 }
1376
1377 static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev,
1378                                               struct amdgpu_irq_src *source,
1379                                               struct amdgpu_iv_entry *entry)
1380 {
1381         DRM_ERROR("Illegal instruction in SDMA command stream\n");
1382         schedule_work(&adev->reset_work);
1383         return 0;
1384 }
1385
1386
1387 static void sdma_v4_0_update_medium_grain_clock_gating(
1388                 struct amdgpu_device *adev,
1389                 bool enable)
1390 {
1391         uint32_t data, def;
1392
1393         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
1394                 /* enable sdma0 clock gating */
1395                 def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL));
1396                 data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
1397                           SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
1398                           SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
1399                           SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
1400                           SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
1401                           SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
1402                           SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
1403                           SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
1404                 if (def != data)
1405                         WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data);
1406
1407                 if (adev->asic_type == CHIP_VEGA10) {
1408                         def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL));
1409                         data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK |
1410                                   SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK |
1411                                   SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK |
1412                                   SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK |
1413                                   SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK |
1414                                   SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK |
1415                                   SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK |
1416                                   SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK);
1417                         if (def != data)
1418                                 WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data);
1419                 }
1420         } else {
1421                 /* disable sdma0 clock gating */
1422                 def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL));
1423                 data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
1424                          SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
1425                          SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
1426                          SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
1427                          SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
1428                          SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
1429                          SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
1430                          SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
1431
1432                 if (def != data)
1433                         WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data);
1434
1435                 if (adev->asic_type == CHIP_VEGA10) {
1436                         def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL));
1437                         data |= (SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK |
1438                                  SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK |
1439                                  SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK |
1440                                  SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK |
1441                                  SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK |
1442                                  SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK |
1443                                  SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK |
1444                                  SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK);
1445                         if (def != data)
1446                                 WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data);
1447                 }
1448         }
1449 }
1450
1451
1452 static void sdma_v4_0_update_medium_grain_light_sleep(
1453                 struct amdgpu_device *adev,
1454                 bool enable)
1455 {
1456         uint32_t data, def;
1457
1458         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
1459                 /* 1-not override: enable sdma0 mem light sleep */
1460                 def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
1461                 data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
1462                 if (def != data)
1463                         WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
1464
1465                 /* 1-not override: enable sdma1 mem light sleep */
1466                 if (adev->asic_type == CHIP_VEGA10) {
1467                         def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL));
1468                         data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
1469                         if (def != data)
1470                                 WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data);
1471                 }
1472         } else {
1473                 /* 0-override:disable sdma0 mem light sleep */
1474                 def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
1475                 data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
1476                 if (def != data)
1477                         WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
1478
1479                 /* 0-override:disable sdma1 mem light sleep */
1480                 if (adev->asic_type == CHIP_VEGA10) {
1481                         def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL));
1482                         data &= ~SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
1483                         if (def != data)
1484                                 WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data);
1485                 }
1486         }
1487 }
1488
1489 static int sdma_v4_0_set_clockgating_state(void *handle,
1490                                           enum amd_clockgating_state state)
1491 {
1492         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1493
1494         if (amdgpu_sriov_vf(adev))
1495                 return 0;
1496
1497         switch (adev->asic_type) {
1498         case CHIP_VEGA10:
1499         case CHIP_RAVEN:
1500                 sdma_v4_0_update_medium_grain_clock_gating(adev,
1501                                 state == AMD_CG_STATE_GATE ? true : false);
1502                 sdma_v4_0_update_medium_grain_light_sleep(adev,
1503                                 state == AMD_CG_STATE_GATE ? true : false);
1504                 break;
1505         default:
1506                 break;
1507         }
1508         return 0;
1509 }
1510
1511 static int sdma_v4_0_set_powergating_state(void *handle,
1512                                           enum amd_powergating_state state)
1513 {
1514         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1515
1516         switch (adev->asic_type) {
1517         case CHIP_RAVEN:
1518                 sdma_v4_1_update_power_gating(adev,
1519                                 state == AMD_PG_STATE_GATE ? true : false);
1520                 break;
1521         default:
1522                 break;
1523         }
1524
1525         return 0;
1526 }
1527
1528 static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags)
1529 {
1530         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1531         int data;
1532
1533         if (amdgpu_sriov_vf(adev))
1534                 *flags = 0;
1535
1536         /* AMD_CG_SUPPORT_SDMA_MGCG */
1537         data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL));
1538         if (!(data & SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK))
1539                 *flags |= AMD_CG_SUPPORT_SDMA_MGCG;
1540
1541         /* AMD_CG_SUPPORT_SDMA_LS */
1542         data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
1543         if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
1544                 *flags |= AMD_CG_SUPPORT_SDMA_LS;
1545 }
1546
1547 const struct amd_ip_funcs sdma_v4_0_ip_funcs = {
1548         .name = "sdma_v4_0",
1549         .early_init = sdma_v4_0_early_init,
1550         .late_init = NULL,
1551         .sw_init = sdma_v4_0_sw_init,
1552         .sw_fini = sdma_v4_0_sw_fini,
1553         .hw_init = sdma_v4_0_hw_init,
1554         .hw_fini = sdma_v4_0_hw_fini,
1555         .suspend = sdma_v4_0_suspend,
1556         .resume = sdma_v4_0_resume,
1557         .is_idle = sdma_v4_0_is_idle,
1558         .wait_for_idle = sdma_v4_0_wait_for_idle,
1559         .soft_reset = sdma_v4_0_soft_reset,
1560         .set_clockgating_state = sdma_v4_0_set_clockgating_state,
1561         .set_powergating_state = sdma_v4_0_set_powergating_state,
1562         .get_clockgating_state = sdma_v4_0_get_clockgating_state,
1563 };
1564
1565 static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
1566         .type = AMDGPU_RING_TYPE_SDMA,
1567         .align_mask = 0xf,
1568         .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
1569         .support_64bit_ptrs = true,
1570         .vmhub = AMDGPU_MMHUB,
1571         .get_rptr = sdma_v4_0_ring_get_rptr,
1572         .get_wptr = sdma_v4_0_ring_get_wptr,
1573         .set_wptr = sdma_v4_0_ring_set_wptr,
1574         .emit_frame_size =
1575                 6 + /* sdma_v4_0_ring_emit_hdp_flush */
1576                 3 + /* hdp invalidate */
1577                 6 + /* sdma_v4_0_ring_emit_pipeline_sync */
1578                 /* sdma_v4_0_ring_emit_vm_flush */
1579                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1580                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
1581                 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
1582         .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
1583         .emit_ib = sdma_v4_0_ring_emit_ib,
1584         .emit_fence = sdma_v4_0_ring_emit_fence,
1585         .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
1586         .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
1587         .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
1588         .test_ring = sdma_v4_0_ring_test_ring,
1589         .test_ib = sdma_v4_0_ring_test_ib,
1590         .insert_nop = sdma_v4_0_ring_insert_nop,
1591         .pad_ib = sdma_v4_0_ring_pad_ib,
1592         .emit_wreg = sdma_v4_0_ring_emit_wreg,
1593         .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
1594 };
1595
1596 static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1597 {
1598         int i;
1599
1600         for (i = 0; i < adev->sdma.num_instances; i++)
1601                 adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs;
1602 }
1603
1604 static const struct amdgpu_irq_src_funcs sdma_v4_0_trap_irq_funcs = {
1605         .set = sdma_v4_0_set_trap_irq_state,
1606         .process = sdma_v4_0_process_trap_irq,
1607 };
1608
1609 static const struct amdgpu_irq_src_funcs sdma_v4_0_illegal_inst_irq_funcs = {
1610         .process = sdma_v4_0_process_illegal_inst_irq,
1611 };
1612
1613 static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1614 {
1615         adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
1616         adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs;
1617         adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs;
1618 }
1619
1620 /**
1621  * sdma_v4_0_emit_copy_buffer - copy buffer using the sDMA engine
1622  *
1623  * @ring: amdgpu_ring structure holding ring information
1624  * @src_offset: src GPU address
1625  * @dst_offset: dst GPU address
1626  * @byte_count: number of bytes to xfer
1627  *
1628  * Copy GPU buffers using the DMA engine (VEGA10).
1629  * Used by the amdgpu ttm implementation to move pages if
1630  * registered as the asic copy callback.
1631  */
1632 static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib,
1633                                        uint64_t src_offset,
1634                                        uint64_t dst_offset,
1635                                        uint32_t byte_count)
1636 {
1637         ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
1638                 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
1639         ib->ptr[ib->length_dw++] = byte_count - 1;
1640         ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1641         ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
1642         ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
1643         ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1644         ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1645 }
1646
1647 /**
1648  * sdma_v4_0_emit_fill_buffer - fill buffer using the sDMA engine
1649  *
1650  * @ring: amdgpu_ring structure holding ring information
1651  * @src_data: value to write to buffer
1652  * @dst_offset: dst GPU address
1653  * @byte_count: number of bytes to xfer
1654  *
1655  * Fill GPU buffers using the DMA engine (VEGA10).
1656  */
1657 static void sdma_v4_0_emit_fill_buffer(struct amdgpu_ib *ib,
1658                                        uint32_t src_data,
1659                                        uint64_t dst_offset,
1660                                        uint32_t byte_count)
1661 {
1662         ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
1663         ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1664         ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1665         ib->ptr[ib->length_dw++] = src_data;
1666         ib->ptr[ib->length_dw++] = byte_count - 1;
1667 }
1668
1669 static const struct amdgpu_buffer_funcs sdma_v4_0_buffer_funcs = {
1670         .copy_max_bytes = 0x400000,
1671         .copy_num_dw = 7,
1672         .emit_copy_buffer = sdma_v4_0_emit_copy_buffer,
1673
1674         .fill_max_bytes = 0x400000,
1675         .fill_num_dw = 5,
1676         .emit_fill_buffer = sdma_v4_0_emit_fill_buffer,
1677 };
1678
1679 static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev)
1680 {
1681         if (adev->mman.buffer_funcs == NULL) {
1682                 adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs;
1683                 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
1684         }
1685 }
1686
1687 static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
1688         .copy_pte_num_dw = 7,
1689         .copy_pte = sdma_v4_0_vm_copy_pte,
1690
1691         .write_pte = sdma_v4_0_vm_write_pte,
1692         .set_pte_pde = sdma_v4_0_vm_set_pte_pde,
1693 };
1694
1695 static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)
1696 {
1697         unsigned i;
1698
1699         if (adev->vm_manager.vm_pte_funcs == NULL) {
1700                 adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs;
1701                 for (i = 0; i < adev->sdma.num_instances; i++)
1702                         adev->vm_manager.vm_pte_rings[i] =
1703                                 &adev->sdma.instance[i].ring;
1704
1705                 adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
1706         }
1707 }
1708
1709 const struct amdgpu_ip_block_version sdma_v4_0_ip_block = {
1710         .type = AMD_IP_BLOCK_TYPE_SDMA,
1711         .major = 4,
1712         .minor = 0,
1713         .rev = 0,
1714         .funcs = &sdma_v4_0_ip_funcs,
1715 };