]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drm/amdkfd: Consistently apply noretry setting
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "hdp/hdp_4_0_offset.h"
41
42 #include "soc15.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55
56 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
62
63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
69
70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
76
77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
83
84 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
86 MODULE_FIRMWARE("amdgpu/raven_me.bin");
87 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
90
91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
98
99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
106
107 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
108 {
109         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
110         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
111         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
112         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
113         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
114         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
115         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
116         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
117         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
118         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
119         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
120         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
121         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
122         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
123         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
124         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
125         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
126         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
127         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
128         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
129 };
130
131 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
132 {
133         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
134         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
135         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
136         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
137         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
138         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
139         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
140         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
141         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
142         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
143         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
144         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
145         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
146         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
147         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
148         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
149         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
150         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
151 };
152
153 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
154 {
155         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
156         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
157         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
158         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
159         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
160         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
161         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
162         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
163         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
164         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
165         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
166 };
167
168 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
169 {
170         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
171         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
172         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
173         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
174         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
175         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
176         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
177         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
178         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
179         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
180         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
181         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
182         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
183         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
184         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
185         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
186         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
187         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
188         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
189         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
190         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
191         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
192         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
193         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
194 };
195
196 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
197 {
198         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
199         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
200         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
201         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
202         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
203         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
204         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
205 };
206
207 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
208 {
209         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
210         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
211         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
212         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
213         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
214         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
215         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
216         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
217         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
218         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
219         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
220         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
221         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
222         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
223         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
224         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
225         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
226         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
227         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
228 };
229
230 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
231 {
232         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
233         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
234         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
235 };
236
237 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
238 {
239         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
240         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
241         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
242         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
243         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
244         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
245         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
246         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
247         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
248         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
249         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
250         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
251         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
252         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
253         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
254         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
255 };
256
257 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
258 {
259         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
260         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
261         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
262         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
263         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
264         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
265         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
266         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
267         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
268         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
269         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
270         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
271         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
272 };
273
274 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
275 {
276         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
277         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
278         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
279         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
280         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
281         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
282         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
283         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
284 };
285
286 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
287 {
288         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
289         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
290         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
291         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
292         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
293         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
294         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
295         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
296 };
297
298 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
299 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
300 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
301 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
302
303 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
304 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
305 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
306 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
307 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
308                                  struct amdgpu_cu_info *cu_info);
309 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
310 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
311 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
312 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
313
314 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
315 {
316         switch (adev->asic_type) {
317         case CHIP_VEGA10:
318                 if (!amdgpu_virt_support_skip_setting(adev)) {
319                         soc15_program_register_sequence(adev,
320                                                          golden_settings_gc_9_0,
321                                                          ARRAY_SIZE(golden_settings_gc_9_0));
322                         soc15_program_register_sequence(adev,
323                                                          golden_settings_gc_9_0_vg10,
324                                                          ARRAY_SIZE(golden_settings_gc_9_0_vg10));
325                 }
326                 break;
327         case CHIP_VEGA12:
328                 soc15_program_register_sequence(adev,
329                                                 golden_settings_gc_9_2_1,
330                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
331                 soc15_program_register_sequence(adev,
332                                                 golden_settings_gc_9_2_1_vg12,
333                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
334                 break;
335         case CHIP_VEGA20:
336                 soc15_program_register_sequence(adev,
337                                                 golden_settings_gc_9_0,
338                                                 ARRAY_SIZE(golden_settings_gc_9_0));
339                 soc15_program_register_sequence(adev,
340                                                 golden_settings_gc_9_0_vg20,
341                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
342                 break;
343         case CHIP_RAVEN:
344                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
345                                                 ARRAY_SIZE(golden_settings_gc_9_1));
346                 if (adev->rev_id >= 8)
347                         soc15_program_register_sequence(adev,
348                                                         golden_settings_gc_9_1_rv2,
349                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
350                 else
351                         soc15_program_register_sequence(adev,
352                                                         golden_settings_gc_9_1_rv1,
353                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
354                 break;
355         default:
356                 break;
357         }
358
359         soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
360                                         (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
361 }
362
363 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
364 {
365         adev->gfx.scratch.num_reg = 8;
366         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
367         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
368 }
369
370 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
371                                        bool wc, uint32_t reg, uint32_t val)
372 {
373         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
374         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
375                                 WRITE_DATA_DST_SEL(0) |
376                                 (wc ? WR_CONFIRM : 0));
377         amdgpu_ring_write(ring, reg);
378         amdgpu_ring_write(ring, 0);
379         amdgpu_ring_write(ring, val);
380 }
381
382 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
383                                   int mem_space, int opt, uint32_t addr0,
384                                   uint32_t addr1, uint32_t ref, uint32_t mask,
385                                   uint32_t inv)
386 {
387         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
388         amdgpu_ring_write(ring,
389                                  /* memory (1) or register (0) */
390                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
391                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
392                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
393                                  WAIT_REG_MEM_ENGINE(eng_sel)));
394
395         if (mem_space)
396                 BUG_ON(addr0 & 0x3); /* Dword align */
397         amdgpu_ring_write(ring, addr0);
398         amdgpu_ring_write(ring, addr1);
399         amdgpu_ring_write(ring, ref);
400         amdgpu_ring_write(ring, mask);
401         amdgpu_ring_write(ring, inv); /* poll interval */
402 }
403
404 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
405 {
406         struct amdgpu_device *adev = ring->adev;
407         uint32_t scratch;
408         uint32_t tmp = 0;
409         unsigned i;
410         int r;
411
412         r = amdgpu_gfx_scratch_get(adev, &scratch);
413         if (r)
414                 return r;
415
416         WREG32(scratch, 0xCAFEDEAD);
417         r = amdgpu_ring_alloc(ring, 3);
418         if (r)
419                 goto error_free_scratch;
420
421         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
422         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
423         amdgpu_ring_write(ring, 0xDEADBEEF);
424         amdgpu_ring_commit(ring);
425
426         for (i = 0; i < adev->usec_timeout; i++) {
427                 tmp = RREG32(scratch);
428                 if (tmp == 0xDEADBEEF)
429                         break;
430                 udelay(1);
431         }
432
433         if (i >= adev->usec_timeout)
434                 r = -ETIMEDOUT;
435
436 error_free_scratch:
437         amdgpu_gfx_scratch_free(adev, scratch);
438         return r;
439 }
440
441 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
442 {
443         struct amdgpu_device *adev = ring->adev;
444         struct amdgpu_ib ib;
445         struct dma_fence *f = NULL;
446
447         unsigned index;
448         uint64_t gpu_addr;
449         uint32_t tmp;
450         long r;
451
452         r = amdgpu_device_wb_get(adev, &index);
453         if (r)
454                 return r;
455
456         gpu_addr = adev->wb.gpu_addr + (index * 4);
457         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
458         memset(&ib, 0, sizeof(ib));
459         r = amdgpu_ib_get(adev, NULL, 16, &ib);
460         if (r)
461                 goto err1;
462
463         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
464         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
465         ib.ptr[2] = lower_32_bits(gpu_addr);
466         ib.ptr[3] = upper_32_bits(gpu_addr);
467         ib.ptr[4] = 0xDEADBEEF;
468         ib.length_dw = 5;
469
470         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
471         if (r)
472                 goto err2;
473
474         r = dma_fence_wait_timeout(f, false, timeout);
475         if (r == 0) {
476                 r = -ETIMEDOUT;
477                 goto err2;
478         } else if (r < 0) {
479                 goto err2;
480         }
481
482         tmp = adev->wb.wb[index];
483         if (tmp == 0xDEADBEEF)
484                 r = 0;
485         else
486                 r = -EINVAL;
487
488 err2:
489         amdgpu_ib_free(adev, &ib, NULL);
490         dma_fence_put(f);
491 err1:
492         amdgpu_device_wb_free(adev, index);
493         return r;
494 }
495
496
497 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
498 {
499         release_firmware(adev->gfx.pfp_fw);
500         adev->gfx.pfp_fw = NULL;
501         release_firmware(adev->gfx.me_fw);
502         adev->gfx.me_fw = NULL;
503         release_firmware(adev->gfx.ce_fw);
504         adev->gfx.ce_fw = NULL;
505         release_firmware(adev->gfx.rlc_fw);
506         adev->gfx.rlc_fw = NULL;
507         release_firmware(adev->gfx.mec_fw);
508         adev->gfx.mec_fw = NULL;
509         release_firmware(adev->gfx.mec2_fw);
510         adev->gfx.mec2_fw = NULL;
511
512         kfree(adev->gfx.rlc.register_list_format);
513 }
514
515 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
516 {
517         const struct rlc_firmware_header_v2_1 *rlc_hdr;
518
519         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
520         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
521         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
522         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
523         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
524         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
525         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
526         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
527         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
528         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
529         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
530         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
531         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
532         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
533                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
534 }
535
536 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
537 {
538         adev->gfx.me_fw_write_wait = false;
539         adev->gfx.mec_fw_write_wait = false;
540
541         switch (adev->asic_type) {
542         case CHIP_VEGA10:
543                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
544                     (adev->gfx.me_feature_version >= 42) &&
545                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
546                     (adev->gfx.pfp_feature_version >= 42))
547                         adev->gfx.me_fw_write_wait = true;
548
549                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
550                     (adev->gfx.mec_feature_version >= 42))
551                         adev->gfx.mec_fw_write_wait = true;
552                 break;
553         case CHIP_VEGA12:
554                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
555                     (adev->gfx.me_feature_version >= 44) &&
556                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
557                     (adev->gfx.pfp_feature_version >= 44))
558                         adev->gfx.me_fw_write_wait = true;
559
560                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
561                     (adev->gfx.mec_feature_version >= 44))
562                         adev->gfx.mec_fw_write_wait = true;
563                 break;
564         case CHIP_VEGA20:
565                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
566                     (adev->gfx.me_feature_version >= 44) &&
567                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
568                     (adev->gfx.pfp_feature_version >= 44))
569                         adev->gfx.me_fw_write_wait = true;
570
571                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
572                     (adev->gfx.mec_feature_version >= 44))
573                         adev->gfx.mec_fw_write_wait = true;
574                 break;
575         case CHIP_RAVEN:
576                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
577                     (adev->gfx.me_feature_version >= 42) &&
578                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
579                     (adev->gfx.pfp_feature_version >= 42))
580                         adev->gfx.me_fw_write_wait = true;
581
582                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
583                     (adev->gfx.mec_feature_version >= 42))
584                         adev->gfx.mec_fw_write_wait = true;
585                 break;
586         default:
587                 break;
588         }
589 }
590
591 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
592 {
593         switch (adev->asic_type) {
594         case CHIP_VEGA10:
595         case CHIP_VEGA12:
596         case CHIP_VEGA20:
597                 break;
598         case CHIP_RAVEN:
599                 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
600                         break;
601                 if ((adev->gfx.rlc_fw_version != 106 &&
602                      adev->gfx.rlc_fw_version < 531) ||
603                     (adev->gfx.rlc_fw_version == 53815) ||
604                     (adev->gfx.rlc_feature_version < 1) ||
605                     !adev->gfx.rlc.is_rlc_v2_1)
606                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
607                 break;
608         default:
609                 break;
610         }
611 }
612
613 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
614 {
615         const char *chip_name;
616         char fw_name[30];
617         int err;
618         struct amdgpu_firmware_info *info = NULL;
619         const struct common_firmware_header *header = NULL;
620         const struct gfx_firmware_header_v1_0 *cp_hdr;
621         const struct rlc_firmware_header_v2_0 *rlc_hdr;
622         unsigned int *tmp = NULL;
623         unsigned int i = 0;
624         uint16_t version_major;
625         uint16_t version_minor;
626         uint32_t smu_version;
627
628         DRM_DEBUG("\n");
629
630         switch (adev->asic_type) {
631         case CHIP_VEGA10:
632                 chip_name = "vega10";
633                 break;
634         case CHIP_VEGA12:
635                 chip_name = "vega12";
636                 break;
637         case CHIP_VEGA20:
638                 chip_name = "vega20";
639                 break;
640         case CHIP_RAVEN:
641                 if (adev->rev_id >= 8)
642                         chip_name = "raven2";
643                 else if (adev->pdev->device == 0x15d8)
644                         chip_name = "picasso";
645                 else
646                         chip_name = "raven";
647                 break;
648         default:
649                 BUG();
650         }
651
652         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
653         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
654         if (err)
655                 goto out;
656         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
657         if (err)
658                 goto out;
659         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
660         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
661         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
662
663         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
664         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
665         if (err)
666                 goto out;
667         err = amdgpu_ucode_validate(adev->gfx.me_fw);
668         if (err)
669                 goto out;
670         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
671         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
672         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
673
674         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
675         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
676         if (err)
677                 goto out;
678         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
679         if (err)
680                 goto out;
681         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
682         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
683         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
684
685         /*
686          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
687          * instead of picasso_rlc.bin.
688          * Judgment method:
689          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
690          *          or revision >= 0xD8 && revision <= 0xDF
691          * otherwise is PCO FP5
692          */
693         if (!strcmp(chip_name, "picasso") &&
694                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
695                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
696                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
697         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
698                 (smu_version >= 0x41e2b))
699                 /**
700                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
701                 */
702                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
703         else
704                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
705         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
706         if (err)
707                 goto out;
708         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
709         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
710
711         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
712         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
713         if (version_major == 2 && version_minor == 1)
714                 adev->gfx.rlc.is_rlc_v2_1 = true;
715
716         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
717         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
718         adev->gfx.rlc.save_and_restore_offset =
719                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
720         adev->gfx.rlc.clear_state_descriptor_offset =
721                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
722         adev->gfx.rlc.avail_scratch_ram_locations =
723                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
724         adev->gfx.rlc.reg_restore_list_size =
725                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
726         adev->gfx.rlc.reg_list_format_start =
727                         le32_to_cpu(rlc_hdr->reg_list_format_start);
728         adev->gfx.rlc.reg_list_format_separate_start =
729                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
730         adev->gfx.rlc.starting_offsets_start =
731                         le32_to_cpu(rlc_hdr->starting_offsets_start);
732         adev->gfx.rlc.reg_list_format_size_bytes =
733                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
734         adev->gfx.rlc.reg_list_size_bytes =
735                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
736         adev->gfx.rlc.register_list_format =
737                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
738                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
739         if (!adev->gfx.rlc.register_list_format) {
740                 err = -ENOMEM;
741                 goto out;
742         }
743
744         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
745                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
746         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
747                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
748
749         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
750
751         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
752                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
753         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
754                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
755
756         if (adev->gfx.rlc.is_rlc_v2_1)
757                 gfx_v9_0_init_rlc_ext_microcode(adev);
758
759         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
760         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
761         if (err)
762                 goto out;
763         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
764         if (err)
765                 goto out;
766         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
767         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
768         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
769
770
771         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
772         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
773         if (!err) {
774                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
775                 if (err)
776                         goto out;
777                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
778                 adev->gfx.mec2_fw->data;
779                 adev->gfx.mec2_fw_version =
780                 le32_to_cpu(cp_hdr->header.ucode_version);
781                 adev->gfx.mec2_feature_version =
782                 le32_to_cpu(cp_hdr->ucode_feature_version);
783         } else {
784                 err = 0;
785                 adev->gfx.mec2_fw = NULL;
786         }
787
788         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
789                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
790                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
791                 info->fw = adev->gfx.pfp_fw;
792                 header = (const struct common_firmware_header *)info->fw->data;
793                 adev->firmware.fw_size +=
794                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
795
796                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
797                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
798                 info->fw = adev->gfx.me_fw;
799                 header = (const struct common_firmware_header *)info->fw->data;
800                 adev->firmware.fw_size +=
801                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
802
803                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
804                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
805                 info->fw = adev->gfx.ce_fw;
806                 header = (const struct common_firmware_header *)info->fw->data;
807                 adev->firmware.fw_size +=
808                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
809
810                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
811                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
812                 info->fw = adev->gfx.rlc_fw;
813                 header = (const struct common_firmware_header *)info->fw->data;
814                 adev->firmware.fw_size +=
815                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
816
817                 if (adev->gfx.rlc.is_rlc_v2_1 &&
818                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
819                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
820                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
821                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
822                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
823                         info->fw = adev->gfx.rlc_fw;
824                         adev->firmware.fw_size +=
825                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
826
827                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
828                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
829                         info->fw = adev->gfx.rlc_fw;
830                         adev->firmware.fw_size +=
831                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
832
833                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
834                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
835                         info->fw = adev->gfx.rlc_fw;
836                         adev->firmware.fw_size +=
837                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
838                 }
839
840                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
841                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
842                 info->fw = adev->gfx.mec_fw;
843                 header = (const struct common_firmware_header *)info->fw->data;
844                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
845                 adev->firmware.fw_size +=
846                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
847
848                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
849                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
850                 info->fw = adev->gfx.mec_fw;
851                 adev->firmware.fw_size +=
852                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
853
854                 if (adev->gfx.mec2_fw) {
855                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
856                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
857                         info->fw = adev->gfx.mec2_fw;
858                         header = (const struct common_firmware_header *)info->fw->data;
859                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
860                         adev->firmware.fw_size +=
861                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
862                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
863                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
864                         info->fw = adev->gfx.mec2_fw;
865                         adev->firmware.fw_size +=
866                                 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
867                 }
868
869         }
870
871 out:
872         gfx_v9_0_check_if_need_gfxoff(adev);
873         gfx_v9_0_check_fw_write_wait(adev);
874         if (err) {
875                 dev_err(adev->dev,
876                         "gfx9: Failed to load firmware \"%s\"\n",
877                         fw_name);
878                 release_firmware(adev->gfx.pfp_fw);
879                 adev->gfx.pfp_fw = NULL;
880                 release_firmware(adev->gfx.me_fw);
881                 adev->gfx.me_fw = NULL;
882                 release_firmware(adev->gfx.ce_fw);
883                 adev->gfx.ce_fw = NULL;
884                 release_firmware(adev->gfx.rlc_fw);
885                 adev->gfx.rlc_fw = NULL;
886                 release_firmware(adev->gfx.mec_fw);
887                 adev->gfx.mec_fw = NULL;
888                 release_firmware(adev->gfx.mec2_fw);
889                 adev->gfx.mec2_fw = NULL;
890         }
891         return err;
892 }
893
894 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
895 {
896         u32 count = 0;
897         const struct cs_section_def *sect = NULL;
898         const struct cs_extent_def *ext = NULL;
899
900         /* begin clear state */
901         count += 2;
902         /* context control state */
903         count += 3;
904
905         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
906                 for (ext = sect->section; ext->extent != NULL; ++ext) {
907                         if (sect->id == SECT_CONTEXT)
908                                 count += 2 + ext->reg_count;
909                         else
910                                 return 0;
911                 }
912         }
913
914         /* end clear state */
915         count += 2;
916         /* clear state */
917         count += 2;
918
919         return count;
920 }
921
922 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
923                                     volatile u32 *buffer)
924 {
925         u32 count = 0, i;
926         const struct cs_section_def *sect = NULL;
927         const struct cs_extent_def *ext = NULL;
928
929         if (adev->gfx.rlc.cs_data == NULL)
930                 return;
931         if (buffer == NULL)
932                 return;
933
934         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
935         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
936
937         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
938         buffer[count++] = cpu_to_le32(0x80000000);
939         buffer[count++] = cpu_to_le32(0x80000000);
940
941         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
942                 for (ext = sect->section; ext->extent != NULL; ++ext) {
943                         if (sect->id == SECT_CONTEXT) {
944                                 buffer[count++] =
945                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
946                                 buffer[count++] = cpu_to_le32(ext->reg_index -
947                                                 PACKET3_SET_CONTEXT_REG_START);
948                                 for (i = 0; i < ext->reg_count; i++)
949                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
950                         } else {
951                                 return;
952                         }
953                 }
954         }
955
956         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
957         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
958
959         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
960         buffer[count++] = cpu_to_le32(0);
961 }
962
963 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
964 {
965         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
966         uint32_t pg_always_on_cu_num = 2;
967         uint32_t always_on_cu_num;
968         uint32_t i, j, k;
969         uint32_t mask, cu_bitmap, counter;
970
971         if (adev->flags & AMD_IS_APU)
972                 always_on_cu_num = 4;
973         else if (adev->asic_type == CHIP_VEGA12)
974                 always_on_cu_num = 8;
975         else
976                 always_on_cu_num = 12;
977
978         mutex_lock(&adev->grbm_idx_mutex);
979         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
980                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
981                         mask = 1;
982                         cu_bitmap = 0;
983                         counter = 0;
984                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
985
986                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
987                                 if (cu_info->bitmap[i][j] & mask) {
988                                         if (counter == pg_always_on_cu_num)
989                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
990                                         if (counter < always_on_cu_num)
991                                                 cu_bitmap |= mask;
992                                         else
993                                                 break;
994                                         counter++;
995                                 }
996                                 mask <<= 1;
997                         }
998
999                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1000                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1001                 }
1002         }
1003         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1004         mutex_unlock(&adev->grbm_idx_mutex);
1005 }
1006
1007 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1008 {
1009         uint32_t data;
1010
1011         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1012         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1013         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1014         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1015         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1016
1017         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1018         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1019
1020         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1021         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1022
1023         mutex_lock(&adev->grbm_idx_mutex);
1024         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1025         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1026         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1027
1028         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1029         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1030         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1031         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1032         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1033
1034         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1035         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1036         data &= 0x0000FFFF;
1037         data |= 0x00C00000;
1038         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1039
1040         /*
1041          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1042          * programmed in gfx_v9_0_init_always_on_cu_mask()
1043          */
1044
1045         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1046          * but used for RLC_LB_CNTL configuration */
1047         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1048         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1049         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1050         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1051         mutex_unlock(&adev->grbm_idx_mutex);
1052
1053         gfx_v9_0_init_always_on_cu_mask(adev);
1054 }
1055
1056 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1057 {
1058         uint32_t data;
1059
1060         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1061         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1062         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1063         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1064         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1065
1066         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1067         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1068
1069         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1070         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1071
1072         mutex_lock(&adev->grbm_idx_mutex);
1073         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1074         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1075         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1076
1077         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1078         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1079         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1080         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1081         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1082
1083         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1084         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1085         data &= 0x0000FFFF;
1086         data |= 0x00C00000;
1087         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1088
1089         /*
1090          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1091          * programmed in gfx_v9_0_init_always_on_cu_mask()
1092          */
1093
1094         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1095          * but used for RLC_LB_CNTL configuration */
1096         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1097         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1098         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1099         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1100         mutex_unlock(&adev->grbm_idx_mutex);
1101
1102         gfx_v9_0_init_always_on_cu_mask(adev);
1103 }
1104
1105 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1106 {
1107         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1108 }
1109
1110 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1111 {
1112         return 5;
1113 }
1114
1115 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1116 {
1117         const struct cs_section_def *cs_data;
1118         int r;
1119
1120         adev->gfx.rlc.cs_data = gfx9_cs_data;
1121
1122         cs_data = adev->gfx.rlc.cs_data;
1123
1124         if (cs_data) {
1125                 /* init clear state block */
1126                 r = amdgpu_gfx_rlc_init_csb(adev);
1127                 if (r)
1128                         return r;
1129         }
1130
1131         if (adev->asic_type == CHIP_RAVEN) {
1132                 /* TODO: double check the cp_table_size for RV */
1133                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1134                 r = amdgpu_gfx_rlc_init_cpt(adev);
1135                 if (r)
1136                         return r;
1137         }
1138
1139         switch (adev->asic_type) {
1140         case CHIP_RAVEN:
1141                 gfx_v9_0_init_lbpw(adev);
1142                 break;
1143         case CHIP_VEGA20:
1144                 gfx_v9_4_init_lbpw(adev);
1145                 break;
1146         default:
1147                 break;
1148         }
1149
1150         return 0;
1151 }
1152
1153 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1154 {
1155         int r;
1156
1157         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1158         if (unlikely(r != 0))
1159                 return r;
1160
1161         r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1162                         AMDGPU_GEM_DOMAIN_VRAM);
1163         if (!r)
1164                 adev->gfx.rlc.clear_state_gpu_addr =
1165                         amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1166
1167         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1168
1169         return r;
1170 }
1171
1172 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1173 {
1174         int r;
1175
1176         if (!adev->gfx.rlc.clear_state_obj)
1177                 return;
1178
1179         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1180         if (likely(r == 0)) {
1181                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1182                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1183         }
1184 }
1185
1186 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1187 {
1188         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1189         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1190 }
1191
1192 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1193 {
1194         int r;
1195         u32 *hpd;
1196         const __le32 *fw_data;
1197         unsigned fw_size;
1198         u32 *fw;
1199         size_t mec_hpd_size;
1200
1201         const struct gfx_firmware_header_v1_0 *mec_hdr;
1202
1203         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1204
1205         /* take ownership of the relevant compute queues */
1206         amdgpu_gfx_compute_queue_acquire(adev);
1207         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1208
1209         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1210                                       AMDGPU_GEM_DOMAIN_VRAM,
1211                                       &adev->gfx.mec.hpd_eop_obj,
1212                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1213                                       (void **)&hpd);
1214         if (r) {
1215                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1216                 gfx_v9_0_mec_fini(adev);
1217                 return r;
1218         }
1219
1220         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1221
1222         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1223         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1224
1225         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1226
1227         fw_data = (const __le32 *)
1228                 (adev->gfx.mec_fw->data +
1229                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1230         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1231
1232         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1233                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1234                                       &adev->gfx.mec.mec_fw_obj,
1235                                       &adev->gfx.mec.mec_fw_gpu_addr,
1236                                       (void **)&fw);
1237         if (r) {
1238                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1239                 gfx_v9_0_mec_fini(adev);
1240                 return r;
1241         }
1242
1243         memcpy(fw, fw_data, fw_size);
1244
1245         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1246         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1247
1248         return 0;
1249 }
1250
1251 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1252 {
1253         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1254                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1255                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1256                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1257                 (SQ_IND_INDEX__FORCE_READ_MASK));
1258         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1259 }
1260
1261 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1262                            uint32_t wave, uint32_t thread,
1263                            uint32_t regno, uint32_t num, uint32_t *out)
1264 {
1265         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1266                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1267                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1268                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1269                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1270                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1271                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1272         while (num--)
1273                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1274 }
1275
1276 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1277 {
1278         /* type 1 wave data */
1279         dst[(*no_fields)++] = 1;
1280         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1281         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1282         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1283         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1284         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1285         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1286         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1287         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1288         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1289         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1290         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1291         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1292         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1293         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1294 }
1295
1296 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1297                                      uint32_t wave, uint32_t start,
1298                                      uint32_t size, uint32_t *dst)
1299 {
1300         wave_read_regs(
1301                 adev, simd, wave, 0,
1302                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1303 }
1304
1305 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1306                                      uint32_t wave, uint32_t thread,
1307                                      uint32_t start, uint32_t size,
1308                                      uint32_t *dst)
1309 {
1310         wave_read_regs(
1311                 adev, simd, wave, thread,
1312                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1313 }
1314
1315 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1316                                   u32 me, u32 pipe, u32 q)
1317 {
1318         soc15_grbm_select(adev, me, pipe, q, 0);
1319 }
1320
1321 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1322         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1323         .select_se_sh = &gfx_v9_0_select_se_sh,
1324         .read_wave_data = &gfx_v9_0_read_wave_data,
1325         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1326         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1327         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
1328 };
1329
1330 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1331 {
1332         u32 gb_addr_config;
1333         int err;
1334
1335         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1336
1337         switch (adev->asic_type) {
1338         case CHIP_VEGA10:
1339                 adev->gfx.config.max_hw_contexts = 8;
1340                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1341                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1342                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1343                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1344                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1345                 break;
1346         case CHIP_VEGA12:
1347                 adev->gfx.config.max_hw_contexts = 8;
1348                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1349                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1350                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1351                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1352                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1353                 DRM_INFO("fix gfx.config for vega12\n");
1354                 break;
1355         case CHIP_VEGA20:
1356                 adev->gfx.config.max_hw_contexts = 8;
1357                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1358                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1359                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1360                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1361                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1362                 gb_addr_config &= ~0xf3e777ff;
1363                 gb_addr_config |= 0x22014042;
1364                 /* check vbios table if gpu info is not available */
1365                 err = amdgpu_atomfirmware_get_gfx_info(adev);
1366                 if (err)
1367                         return err;
1368                 break;
1369         case CHIP_RAVEN:
1370                 adev->gfx.config.max_hw_contexts = 8;
1371                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1372                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1373                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1374                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1375                 if (adev->rev_id >= 8)
1376                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1377                 else
1378                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1379                 break;
1380         default:
1381                 BUG();
1382                 break;
1383         }
1384
1385         adev->gfx.config.gb_addr_config = gb_addr_config;
1386
1387         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1388                         REG_GET_FIELD(
1389                                         adev->gfx.config.gb_addr_config,
1390                                         GB_ADDR_CONFIG,
1391                                         NUM_PIPES);
1392
1393         adev->gfx.config.max_tile_pipes =
1394                 adev->gfx.config.gb_addr_config_fields.num_pipes;
1395
1396         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1397                         REG_GET_FIELD(
1398                                         adev->gfx.config.gb_addr_config,
1399                                         GB_ADDR_CONFIG,
1400                                         NUM_BANKS);
1401         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1402                         REG_GET_FIELD(
1403                                         adev->gfx.config.gb_addr_config,
1404                                         GB_ADDR_CONFIG,
1405                                         MAX_COMPRESSED_FRAGS);
1406         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1407                         REG_GET_FIELD(
1408                                         adev->gfx.config.gb_addr_config,
1409                                         GB_ADDR_CONFIG,
1410                                         NUM_RB_PER_SE);
1411         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1412                         REG_GET_FIELD(
1413                                         adev->gfx.config.gb_addr_config,
1414                                         GB_ADDR_CONFIG,
1415                                         NUM_SHADER_ENGINES);
1416         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1417                         REG_GET_FIELD(
1418                                         adev->gfx.config.gb_addr_config,
1419                                         GB_ADDR_CONFIG,
1420                                         PIPE_INTERLEAVE_SIZE));
1421
1422         return 0;
1423 }
1424
1425 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1426                                    struct amdgpu_ngg_buf *ngg_buf,
1427                                    int size_se,
1428                                    int default_size_se)
1429 {
1430         int r;
1431
1432         if (size_se < 0) {
1433                 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1434                 return -EINVAL;
1435         }
1436         size_se = size_se ? size_se : default_size_se;
1437
1438         ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1439         r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1440                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1441                                     &ngg_buf->bo,
1442                                     &ngg_buf->gpu_addr,
1443                                     NULL);
1444         if (r) {
1445                 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1446                 return r;
1447         }
1448         ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1449
1450         return r;
1451 }
1452
1453 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1454 {
1455         int i;
1456
1457         for (i = 0; i < NGG_BUF_MAX; i++)
1458                 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1459                                       &adev->gfx.ngg.buf[i].gpu_addr,
1460                                       NULL);
1461
1462         memset(&adev->gfx.ngg.buf[0], 0,
1463                         sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1464
1465         adev->gfx.ngg.init = false;
1466
1467         return 0;
1468 }
1469
1470 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1471 {
1472         int r;
1473
1474         if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1475                 return 0;
1476
1477         /* GDS reserve memory: 64 bytes alignment */
1478         adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1479         adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1480         adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1481         adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1482
1483         /* Primitive Buffer */
1484         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1485                                     amdgpu_prim_buf_per_se,
1486                                     64 * 1024);
1487         if (r) {
1488                 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1489                 goto err;
1490         }
1491
1492         /* Position Buffer */
1493         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1494                                     amdgpu_pos_buf_per_se,
1495                                     256 * 1024);
1496         if (r) {
1497                 dev_err(adev->dev, "Failed to create Position Buffer\n");
1498                 goto err;
1499         }
1500
1501         /* Control Sideband */
1502         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1503                                     amdgpu_cntl_sb_buf_per_se,
1504                                     256);
1505         if (r) {
1506                 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1507                 goto err;
1508         }
1509
1510         /* Parameter Cache, not created by default */
1511         if (amdgpu_param_buf_per_se <= 0)
1512                 goto out;
1513
1514         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
1515                                     amdgpu_param_buf_per_se,
1516                                     512 * 1024);
1517         if (r) {
1518                 dev_err(adev->dev, "Failed to create Parameter Cache\n");
1519                 goto err;
1520         }
1521
1522 out:
1523         adev->gfx.ngg.init = true;
1524         return 0;
1525 err:
1526         gfx_v9_0_ngg_fini(adev);
1527         return r;
1528 }
1529
1530 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1531 {
1532         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1533         int r;
1534         u32 data, base;
1535
1536         if (!amdgpu_ngg)
1537                 return 0;
1538
1539         /* Program buffer size */
1540         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1541                              adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1542         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1543                              adev->gfx.ngg.buf[NGG_POS].size >> 8);
1544         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
1545
1546         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1547                              adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1548         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1549                              adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
1550         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
1551
1552         /* Program buffer base address */
1553         base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1554         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
1555         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
1556
1557         base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1558         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
1559         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
1560
1561         base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1562         data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
1563         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
1564
1565         base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1566         data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
1567         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
1568
1569         base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1570         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
1571         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
1572
1573         base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1574         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
1575         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
1576
1577         /* Clear GDS reserved memory */
1578         r = amdgpu_ring_alloc(ring, 17);
1579         if (r) {
1580                 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
1581                           ring->name, r);
1582                 return r;
1583         }
1584
1585         gfx_v9_0_write_data_to_reg(ring, 0, false,
1586                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
1587                                    (adev->gds.gds_size +
1588                                     adev->gfx.ngg.gds_reserve_size));
1589
1590         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1591         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
1592                                 PACKET3_DMA_DATA_DST_SEL(1) |
1593                                 PACKET3_DMA_DATA_SRC_SEL(2)));
1594         amdgpu_ring_write(ring, 0);
1595         amdgpu_ring_write(ring, 0);
1596         amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1597         amdgpu_ring_write(ring, 0);
1598         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1599                                 adev->gfx.ngg.gds_reserve_size);
1600
1601         gfx_v9_0_write_data_to_reg(ring, 0, false,
1602                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
1603
1604         amdgpu_ring_commit(ring);
1605
1606         return 0;
1607 }
1608
1609 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1610                                       int mec, int pipe, int queue)
1611 {
1612         int r;
1613         unsigned irq_type;
1614         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1615
1616         ring = &adev->gfx.compute_ring[ring_id];
1617
1618         /* mec0 is me1 */
1619         ring->me = mec + 1;
1620         ring->pipe = pipe;
1621         ring->queue = queue;
1622
1623         ring->ring_obj = NULL;
1624         ring->use_doorbell = true;
1625         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1626         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1627                                 + (ring_id * GFX9_MEC_HPD_SIZE);
1628         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1629
1630         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1631                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1632                 + ring->pipe;
1633
1634         /* type-2 packets are deprecated on MEC, use type-3 instead */
1635         r = amdgpu_ring_init(adev, ring, 1024,
1636                              &adev->gfx.eop_irq, irq_type);
1637         if (r)
1638                 return r;
1639
1640
1641         return 0;
1642 }
1643
1644 static int gfx_v9_0_sw_init(void *handle)
1645 {
1646         int i, j, k, r, ring_id;
1647         struct amdgpu_ring *ring;
1648         struct amdgpu_kiq *kiq;
1649         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1650
1651         switch (adev->asic_type) {
1652         case CHIP_VEGA10:
1653         case CHIP_VEGA12:
1654         case CHIP_VEGA20:
1655         case CHIP_RAVEN:
1656                 adev->gfx.mec.num_mec = 2;
1657                 break;
1658         default:
1659                 adev->gfx.mec.num_mec = 1;
1660                 break;
1661         }
1662
1663         adev->gfx.mec.num_pipe_per_mec = 4;
1664         adev->gfx.mec.num_queue_per_pipe = 8;
1665
1666         /* EOP Event */
1667         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
1668         if (r)
1669                 return r;
1670
1671         /* Privileged reg */
1672         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
1673                               &adev->gfx.priv_reg_irq);
1674         if (r)
1675                 return r;
1676
1677         /* Privileged inst */
1678         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
1679                               &adev->gfx.priv_inst_irq);
1680         if (r)
1681                 return r;
1682
1683         /* ECC error */
1684         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
1685                               &adev->gfx.cp_ecc_error_irq);
1686         if (r)
1687                 return r;
1688
1689         /* FUE error */
1690         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
1691                               &adev->gfx.cp_ecc_error_irq);
1692         if (r)
1693                 return r;
1694
1695         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1696
1697         gfx_v9_0_scratch_init(adev);
1698
1699         r = gfx_v9_0_init_microcode(adev);
1700         if (r) {
1701                 DRM_ERROR("Failed to load gfx firmware!\n");
1702                 return r;
1703         }
1704
1705         r = adev->gfx.rlc.funcs->init(adev);
1706         if (r) {
1707                 DRM_ERROR("Failed to init rlc BOs!\n");
1708                 return r;
1709         }
1710
1711         r = gfx_v9_0_mec_init(adev);
1712         if (r) {
1713                 DRM_ERROR("Failed to init MEC BOs!\n");
1714                 return r;
1715         }
1716
1717         /* set up the gfx ring */
1718         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1719                 ring = &adev->gfx.gfx_ring[i];
1720                 ring->ring_obj = NULL;
1721                 if (!i)
1722                         sprintf(ring->name, "gfx");
1723                 else
1724                         sprintf(ring->name, "gfx_%d", i);
1725                 ring->use_doorbell = true;
1726                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1727                 r = amdgpu_ring_init(adev, ring, 1024,
1728                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
1729                 if (r)
1730                         return r;
1731         }
1732
1733         /* set up the compute queues - allocate horizontally across pipes */
1734         ring_id = 0;
1735         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1736                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1737                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1738                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1739                                         continue;
1740
1741                                 r = gfx_v9_0_compute_ring_init(adev,
1742                                                                ring_id,
1743                                                                i, k, j);
1744                                 if (r)
1745                                         return r;
1746
1747                                 ring_id++;
1748                         }
1749                 }
1750         }
1751
1752         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
1753         if (r) {
1754                 DRM_ERROR("Failed to init KIQ BOs!\n");
1755                 return r;
1756         }
1757
1758         kiq = &adev->gfx.kiq;
1759         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1760         if (r)
1761                 return r;
1762
1763         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
1764         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
1765         if (r)
1766                 return r;
1767
1768         adev->gfx.ce_ram_size = 0x8000;
1769
1770         r = gfx_v9_0_gpu_early_init(adev);
1771         if (r)
1772                 return r;
1773
1774         r = gfx_v9_0_ngg_init(adev);
1775         if (r)
1776                 return r;
1777
1778         return 0;
1779 }
1780
1781
1782 static int gfx_v9_0_sw_fini(void *handle)
1783 {
1784         int i;
1785         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1786
1787         if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
1788                         adev->gfx.ras_if) {
1789                 struct ras_common_if *ras_if = adev->gfx.ras_if;
1790                 struct ras_ih_if ih_info = {
1791                         .head = *ras_if,
1792                 };
1793
1794                 amdgpu_ras_debugfs_remove(adev, ras_if);
1795                 amdgpu_ras_sysfs_remove(adev, ras_if);
1796                 amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
1797                 amdgpu_ras_feature_enable(adev, ras_if, 0);
1798                 kfree(ras_if);
1799         }
1800
1801         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1802                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1803         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1804                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1805
1806         amdgpu_gfx_mqd_sw_fini(adev);
1807         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1808         amdgpu_gfx_kiq_fini(adev);
1809
1810         gfx_v9_0_mec_fini(adev);
1811         gfx_v9_0_ngg_fini(adev);
1812         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1813         if (adev->asic_type == CHIP_RAVEN) {
1814                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1815                                 &adev->gfx.rlc.cp_table_gpu_addr,
1816                                 (void **)&adev->gfx.rlc.cp_table_ptr);
1817         }
1818         gfx_v9_0_free_microcode(adev);
1819
1820         return 0;
1821 }
1822
1823
1824 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1825 {
1826         /* TODO */
1827 }
1828
1829 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1830 {
1831         u32 data;
1832
1833         if (instance == 0xffffffff)
1834                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1835         else
1836                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1837
1838         if (se_num == 0xffffffff)
1839                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1840         else
1841                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1842
1843         if (sh_num == 0xffffffff)
1844                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1845         else
1846                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1847
1848         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
1849 }
1850
1851 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1852 {
1853         u32 data, mask;
1854
1855         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1856         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
1857
1858         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1859         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1860
1861         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1862                                          adev->gfx.config.max_sh_per_se);
1863
1864         return (~data) & mask;
1865 }
1866
1867 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1868 {
1869         int i, j;
1870         u32 data;
1871         u32 active_rbs = 0;
1872         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1873                                         adev->gfx.config.max_sh_per_se;
1874
1875         mutex_lock(&adev->grbm_idx_mutex);
1876         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1877                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1878                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1879                         data = gfx_v9_0_get_rb_active_bitmap(adev);
1880                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1881                                                rb_bitmap_width_per_sh);
1882                 }
1883         }
1884         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1885         mutex_unlock(&adev->grbm_idx_mutex);
1886
1887         adev->gfx.config.backend_enable_mask = active_rbs;
1888         adev->gfx.config.num_rbs = hweight32(active_rbs);
1889 }
1890
1891 #define DEFAULT_SH_MEM_BASES    (0x6000)
1892 #define FIRST_COMPUTE_VMID      (8)
1893 #define LAST_COMPUTE_VMID       (16)
1894 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1895 {
1896         int i;
1897         uint32_t sh_mem_config;
1898         uint32_t sh_mem_bases;
1899
1900         /*
1901          * Configure apertures:
1902          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1903          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1904          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1905          */
1906         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1907
1908         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1909                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1910                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1911
1912         mutex_lock(&adev->srbm_mutex);
1913         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1914                 soc15_grbm_select(adev, 0, 0, 0, i);
1915                 /* CP and shaders */
1916                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1917                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
1918         }
1919         soc15_grbm_select(adev, 0, 0, 0, 0);
1920         mutex_unlock(&adev->srbm_mutex);
1921 }
1922
1923 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
1924 {
1925         u32 tmp;
1926         int i;
1927
1928         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1929
1930         gfx_v9_0_tiling_mode_table_init(adev);
1931
1932         gfx_v9_0_setup_rb(adev);
1933         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
1934         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
1935
1936         /* XXX SH_MEM regs */
1937         /* where to put LDS, scratch, GPUVM in FSA64 space */
1938         mutex_lock(&adev->srbm_mutex);
1939         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
1940                 soc15_grbm_select(adev, 0, 0, 0, i);
1941                 /* CP and shaders */
1942                 if (i == 0) {
1943                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1944                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1945                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
1946                                             !!amdgpu_noretry);
1947                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1948                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
1949                 } else {
1950                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1951                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1952                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
1953                                             !!amdgpu_noretry);
1954                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1955                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1956                                 (adev->gmc.private_aperture_start >> 48));
1957                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1958                                 (adev->gmc.shared_aperture_start >> 48));
1959                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
1960                 }
1961         }
1962         soc15_grbm_select(adev, 0, 0, 0, 0);
1963
1964         mutex_unlock(&adev->srbm_mutex);
1965
1966         gfx_v9_0_init_compute_vmid(adev);
1967 }
1968
1969 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1970 {
1971         u32 i, j, k;
1972         u32 mask;
1973
1974         mutex_lock(&adev->grbm_idx_mutex);
1975         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1976                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1977                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1978                         for (k = 0; k < adev->usec_timeout; k++) {
1979                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
1980                                         break;
1981                                 udelay(1);
1982                         }
1983                         if (k == adev->usec_timeout) {
1984                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
1985                                                       0xffffffff, 0xffffffff);
1986                                 mutex_unlock(&adev->grbm_idx_mutex);
1987                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
1988                                          i, j);
1989                                 return;
1990                         }
1991                 }
1992         }
1993         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1994         mutex_unlock(&adev->grbm_idx_mutex);
1995
1996         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
1997                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
1998                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
1999                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2000         for (k = 0; k < adev->usec_timeout; k++) {
2001                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2002                         break;
2003                 udelay(1);
2004         }
2005 }
2006
2007 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2008                                                bool enable)
2009 {
2010         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2011
2012         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2013         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2014         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2015         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2016
2017         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2018 }
2019
2020 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2021 {
2022         /* csib */
2023         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2024                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2025         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2026                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2027         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2028                         adev->gfx.rlc.clear_state_size);
2029 }
2030
2031 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2032                                 int indirect_offset,
2033                                 int list_size,
2034                                 int *unique_indirect_regs,
2035                                 int unique_indirect_reg_count,
2036                                 int *indirect_start_offsets,
2037                                 int *indirect_start_offsets_count,
2038                                 int max_start_offsets_count)
2039 {
2040         int idx;
2041
2042         for (; indirect_offset < list_size; indirect_offset++) {
2043                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2044                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2045                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2046
2047                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2048                         indirect_offset += 2;
2049
2050                         /* look for the matching indice */
2051                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2052                                 if (unique_indirect_regs[idx] ==
2053                                         register_list_format[indirect_offset] ||
2054                                         !unique_indirect_regs[idx])
2055                                         break;
2056                         }
2057
2058                         BUG_ON(idx >= unique_indirect_reg_count);
2059
2060                         if (!unique_indirect_regs[idx])
2061                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2062
2063                         indirect_offset++;
2064                 }
2065         }
2066 }
2067
2068 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2069 {
2070         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2071         int unique_indirect_reg_count = 0;
2072
2073         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2074         int indirect_start_offsets_count = 0;
2075
2076         int list_size = 0;
2077         int i = 0, j = 0;
2078         u32 tmp = 0;
2079
2080         u32 *register_list_format =
2081                 kmemdup(adev->gfx.rlc.register_list_format,
2082                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2083         if (!register_list_format)
2084                 return -ENOMEM;
2085
2086         /* setup unique_indirect_regs array and indirect_start_offsets array */
2087         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2088         gfx_v9_1_parse_ind_reg_list(register_list_format,
2089                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2090                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2091                                     unique_indirect_regs,
2092                                     unique_indirect_reg_count,
2093                                     indirect_start_offsets,
2094                                     &indirect_start_offsets_count,
2095                                     ARRAY_SIZE(indirect_start_offsets));
2096
2097         /* enable auto inc in case it is disabled */
2098         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2099         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2100         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2101
2102         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2103         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2104                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2105         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2106                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2107                         adev->gfx.rlc.register_restore[i]);
2108
2109         /* load indirect register */
2110         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2111                 adev->gfx.rlc.reg_list_format_start);
2112
2113         /* direct register portion */
2114         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2115                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2116                         register_list_format[i]);
2117
2118         /* indirect register portion */
2119         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2120                 if (register_list_format[i] == 0xFFFFFFFF) {
2121                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2122                         continue;
2123                 }
2124
2125                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2126                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2127
2128                 for (j = 0; j < unique_indirect_reg_count; j++) {
2129                         if (register_list_format[i] == unique_indirect_regs[j]) {
2130                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2131                                 break;
2132                         }
2133                 }
2134
2135                 BUG_ON(j >= unique_indirect_reg_count);
2136
2137                 i++;
2138         }
2139
2140         /* set save/restore list size */
2141         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2142         list_size = list_size >> 1;
2143         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2144                 adev->gfx.rlc.reg_restore_list_size);
2145         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2146
2147         /* write the starting offsets to RLC scratch ram */
2148         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2149                 adev->gfx.rlc.starting_offsets_start);
2150         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2151                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2152                        indirect_start_offsets[i]);
2153
2154         /* load unique indirect regs*/
2155         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2156                 if (unique_indirect_regs[i] != 0) {
2157                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2158                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2159                                unique_indirect_regs[i] & 0x3FFFF);
2160
2161                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2162                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2163                                unique_indirect_regs[i] >> 20);
2164                 }
2165         }
2166
2167         kfree(register_list_format);
2168         return 0;
2169 }
2170
2171 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2172 {
2173         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2174 }
2175
2176 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2177                                              bool enable)
2178 {
2179         uint32_t data = 0;
2180         uint32_t default_data = 0;
2181
2182         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2183         if (enable == true) {
2184                 /* enable GFXIP control over CGPG */
2185                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2186                 if(default_data != data)
2187                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2188
2189                 /* update status */
2190                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2191                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2192                 if(default_data != data)
2193                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2194         } else {
2195                 /* restore GFXIP control over GCPG */
2196                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2197                 if(default_data != data)
2198                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2199         }
2200 }
2201
2202 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2203 {
2204         uint32_t data = 0;
2205
2206         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2207                               AMD_PG_SUPPORT_GFX_SMG |
2208                               AMD_PG_SUPPORT_GFX_DMG)) {
2209                 /* init IDLE_POLL_COUNT = 60 */
2210                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2211                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2212                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2213                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2214
2215                 /* init RLC PG Delay */
2216                 data = 0;
2217                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2218                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2219                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2220                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2221                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2222
2223                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2224                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2225                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2226                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2227
2228                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2229                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2230                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2231                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2232
2233                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2234                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2235
2236                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2237                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2238                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2239
2240                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2241         }
2242 }
2243
2244 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2245                                                 bool enable)
2246 {
2247         uint32_t data = 0;
2248         uint32_t default_data = 0;
2249
2250         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2251         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2252                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2253                              enable ? 1 : 0);
2254         if (default_data != data)
2255                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2256 }
2257
2258 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2259                                                 bool enable)
2260 {
2261         uint32_t data = 0;
2262         uint32_t default_data = 0;
2263
2264         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2265         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2266                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2267                              enable ? 1 : 0);
2268         if(default_data != data)
2269                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2270 }
2271
2272 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2273                                         bool enable)
2274 {
2275         uint32_t data = 0;
2276         uint32_t default_data = 0;
2277
2278         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2279         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2280                              CP_PG_DISABLE,
2281                              enable ? 0 : 1);
2282         if(default_data != data)
2283                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2284 }
2285
2286 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2287                                                 bool enable)
2288 {
2289         uint32_t data, default_data;
2290
2291         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2292         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2293                              GFX_POWER_GATING_ENABLE,
2294                              enable ? 1 : 0);
2295         if(default_data != data)
2296                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2297 }
2298
2299 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2300                                                 bool enable)
2301 {
2302         uint32_t data, default_data;
2303
2304         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2305         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2306                              GFX_PIPELINE_PG_ENABLE,
2307                              enable ? 1 : 0);
2308         if(default_data != data)
2309                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2310
2311         if (!enable)
2312                 /* read any GFX register to wake up GFX */
2313                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2314 }
2315
2316 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2317                                                        bool enable)
2318 {
2319         uint32_t data, default_data;
2320
2321         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2322         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2323                              STATIC_PER_CU_PG_ENABLE,
2324                              enable ? 1 : 0);
2325         if(default_data != data)
2326                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2327 }
2328
2329 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2330                                                 bool enable)
2331 {
2332         uint32_t data, default_data;
2333
2334         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2335         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2336                              DYN_PER_CU_PG_ENABLE,
2337                              enable ? 1 : 0);
2338         if(default_data != data)
2339                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2340 }
2341
2342 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2343 {
2344         gfx_v9_0_init_csb(adev);
2345
2346         /*
2347          * Rlc save restore list is workable since v2_1.
2348          * And it's needed by gfxoff feature.
2349          */
2350         if (adev->gfx.rlc.is_rlc_v2_1) {
2351                 gfx_v9_1_init_rlc_save_restore_list(adev);
2352                 gfx_v9_0_enable_save_restore_machine(adev);
2353         }
2354
2355         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2356                               AMD_PG_SUPPORT_GFX_SMG |
2357                               AMD_PG_SUPPORT_GFX_DMG |
2358                               AMD_PG_SUPPORT_CP |
2359                               AMD_PG_SUPPORT_GDS |
2360                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2361                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2362                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2363                 gfx_v9_0_init_gfx_power_gating(adev);
2364         }
2365 }
2366
2367 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2368 {
2369         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2370         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2371         gfx_v9_0_wait_for_rlc_serdes(adev);
2372 }
2373
2374 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2375 {
2376         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2377         udelay(50);
2378         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2379         udelay(50);
2380 }
2381
2382 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2383 {
2384 #ifdef AMDGPU_RLC_DEBUG_RETRY
2385         u32 rlc_ucode_ver;
2386 #endif
2387
2388         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2389         udelay(50);
2390
2391         /* carrizo do enable cp interrupt after cp inited */
2392         if (!(adev->flags & AMD_IS_APU)) {
2393                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2394                 udelay(50);
2395         }
2396
2397 #ifdef AMDGPU_RLC_DEBUG_RETRY
2398         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2399         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2400         if(rlc_ucode_ver == 0x108) {
2401                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2402                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2403                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2404                  * default is 0x9C4 to create a 100us interval */
2405                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2406                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2407                  * to disable the page fault retry interrupts, default is
2408                  * 0x100 (256) */
2409                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2410         }
2411 #endif
2412 }
2413
2414 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2415 {
2416         const struct rlc_firmware_header_v2_0 *hdr;
2417         const __le32 *fw_data;
2418         unsigned i, fw_size;
2419
2420         if (!adev->gfx.rlc_fw)
2421                 return -EINVAL;
2422
2423         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2424         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2425
2426         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2427                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2428         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2429
2430         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2431                         RLCG_UCODE_LOADING_START_ADDRESS);
2432         for (i = 0; i < fw_size; i++)
2433                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2434         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2435
2436         return 0;
2437 }
2438
2439 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2440 {
2441         int r;
2442
2443         if (amdgpu_sriov_vf(adev)) {
2444                 gfx_v9_0_init_csb(adev);
2445                 return 0;
2446         }
2447
2448         adev->gfx.rlc.funcs->stop(adev);
2449
2450         /* disable CG */
2451         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2452
2453         gfx_v9_0_init_pg(adev);
2454
2455         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2456                 /* legacy rlc firmware loading */
2457                 r = gfx_v9_0_rlc_load_microcode(adev);
2458                 if (r)
2459                         return r;
2460         }
2461
2462         switch (adev->asic_type) {
2463         case CHIP_RAVEN:
2464                 if (amdgpu_lbpw == 0)
2465                         gfx_v9_0_enable_lbpw(adev, false);
2466                 else
2467                         gfx_v9_0_enable_lbpw(adev, true);
2468                 break;
2469         case CHIP_VEGA20:
2470                 if (amdgpu_lbpw > 0)
2471                         gfx_v9_0_enable_lbpw(adev, true);
2472                 else
2473                         gfx_v9_0_enable_lbpw(adev, false);
2474                 break;
2475         default:
2476                 break;
2477         }
2478
2479         adev->gfx.rlc.funcs->start(adev);
2480
2481         return 0;
2482 }
2483
2484 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2485 {
2486         int i;
2487         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2488
2489         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2490         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2491         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2492         if (!enable) {
2493                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2494                         adev->gfx.gfx_ring[i].sched.ready = false;
2495         }
2496         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2497         udelay(50);
2498 }
2499
2500 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2501 {
2502         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2503         const struct gfx_firmware_header_v1_0 *ce_hdr;
2504         const struct gfx_firmware_header_v1_0 *me_hdr;
2505         const __le32 *fw_data;
2506         unsigned i, fw_size;
2507
2508         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2509                 return -EINVAL;
2510
2511         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2512                 adev->gfx.pfp_fw->data;
2513         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2514                 adev->gfx.ce_fw->data;
2515         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2516                 adev->gfx.me_fw->data;
2517
2518         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2519         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2520         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2521
2522         gfx_v9_0_cp_gfx_enable(adev, false);
2523
2524         /* PFP */
2525         fw_data = (const __le32 *)
2526                 (adev->gfx.pfp_fw->data +
2527                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2528         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2529         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2530         for (i = 0; i < fw_size; i++)
2531                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2532         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2533
2534         /* CE */
2535         fw_data = (const __le32 *)
2536                 (adev->gfx.ce_fw->data +
2537                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2538         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2539         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2540         for (i = 0; i < fw_size; i++)
2541                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2542         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2543
2544         /* ME */
2545         fw_data = (const __le32 *)
2546                 (adev->gfx.me_fw->data +
2547                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2548         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2549         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2550         for (i = 0; i < fw_size; i++)
2551                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2552         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2553
2554         return 0;
2555 }
2556
2557 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2558 {
2559         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2560         const struct cs_section_def *sect = NULL;
2561         const struct cs_extent_def *ext = NULL;
2562         int r, i, tmp;
2563
2564         /* init the CP */
2565         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2566         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2567
2568         gfx_v9_0_cp_gfx_enable(adev, true);
2569
2570         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2571         if (r) {
2572                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2573                 return r;
2574         }
2575
2576         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2577         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2578
2579         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2580         amdgpu_ring_write(ring, 0x80000000);
2581         amdgpu_ring_write(ring, 0x80000000);
2582
2583         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2584                 for (ext = sect->section; ext->extent != NULL; ++ext) {
2585                         if (sect->id == SECT_CONTEXT) {
2586                                 amdgpu_ring_write(ring,
2587                                        PACKET3(PACKET3_SET_CONTEXT_REG,
2588                                                ext->reg_count));
2589                                 amdgpu_ring_write(ring,
2590                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2591                                 for (i = 0; i < ext->reg_count; i++)
2592                                         amdgpu_ring_write(ring, ext->extent[i]);
2593                         }
2594                 }
2595         }
2596
2597         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2598         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2599
2600         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2601         amdgpu_ring_write(ring, 0);
2602
2603         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2604         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2605         amdgpu_ring_write(ring, 0x8000);
2606         amdgpu_ring_write(ring, 0x8000);
2607
2608         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2609         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2610                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2611         amdgpu_ring_write(ring, tmp);
2612         amdgpu_ring_write(ring, 0);
2613
2614         amdgpu_ring_commit(ring);
2615
2616         return 0;
2617 }
2618
2619 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2620 {
2621         struct amdgpu_ring *ring;
2622         u32 tmp;
2623         u32 rb_bufsz;
2624         u64 rb_addr, rptr_addr, wptr_gpu_addr;
2625
2626         /* Set the write pointer delay */
2627         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2628
2629         /* set the RB to use vmid 0 */
2630         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2631
2632         /* Set ring buffer size */
2633         ring = &adev->gfx.gfx_ring[0];
2634         rb_bufsz = order_base_2(ring->ring_size / 8);
2635         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2636         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2637 #ifdef __BIG_ENDIAN
2638         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2639 #endif
2640         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2641
2642         /* Initialize the ring buffer's write pointers */
2643         ring->wptr = 0;
2644         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2645         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2646
2647         /* set the wb address wether it's enabled or not */
2648         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2649         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2650         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2651
2652         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2653         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2654         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
2655
2656         mdelay(1);
2657         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2658
2659         rb_addr = ring->gpu_addr >> 8;
2660         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2661         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2662
2663         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
2664         if (ring->use_doorbell) {
2665                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2666                                     DOORBELL_OFFSET, ring->doorbell_index);
2667                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2668                                     DOORBELL_EN, 1);
2669         } else {
2670                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2671         }
2672         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
2673
2674         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2675                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
2676         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2677
2678         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
2679                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2680
2681
2682         /* start the ring */
2683         gfx_v9_0_cp_gfx_start(adev);
2684         ring->sched.ready = true;
2685
2686         return 0;
2687 }
2688
2689 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2690 {
2691         int i;
2692
2693         if (enable) {
2694                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
2695         } else {
2696                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
2697                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2698                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2699                         adev->gfx.compute_ring[i].sched.ready = false;
2700                 adev->gfx.kiq.ring.sched.ready = false;
2701         }
2702         udelay(50);
2703 }
2704
2705 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2706 {
2707         const struct gfx_firmware_header_v1_0 *mec_hdr;
2708         const __le32 *fw_data;
2709         unsigned i;
2710         u32 tmp;
2711
2712         if (!adev->gfx.mec_fw)
2713                 return -EINVAL;
2714
2715         gfx_v9_0_cp_compute_enable(adev, false);
2716
2717         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2718         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2719
2720         fw_data = (const __le32 *)
2721                 (adev->gfx.mec_fw->data +
2722                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2723         tmp = 0;
2724         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2725         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2726         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
2727
2728         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
2729                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
2730         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2731                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2732
2733         /* MEC1 */
2734         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2735                          mec_hdr->jt_offset);
2736         for (i = 0; i < mec_hdr->jt_size; i++)
2737                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
2738                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2739
2740         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2741                         adev->gfx.mec_fw_version);
2742         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2743
2744         return 0;
2745 }
2746
2747 /* KIQ functions */
2748 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2749 {
2750         uint32_t tmp;
2751         struct amdgpu_device *adev = ring->adev;
2752
2753         /* tell RLC which is KIQ queue */
2754         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2755         tmp &= 0xffffff00;
2756         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2757         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2758         tmp |= 0x80;
2759         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2760 }
2761
2762 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2763 {
2764         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
2765         uint64_t queue_mask = 0;
2766         int r, i;
2767
2768         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2769                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2770                         continue;
2771
2772                 /* This situation may be hit in the future if a new HW
2773                  * generation exposes more than 64 queues. If so, the
2774                  * definition of queue_mask needs updating */
2775                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
2776                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2777                         break;
2778                 }
2779
2780                 queue_mask |= (1ull << i);
2781         }
2782
2783         r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
2784         if (r) {
2785                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2786                 return r;
2787         }
2788
2789         /* set resources */
2790         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2791         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2792                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
2793         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
2794         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
2795         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
2796         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
2797         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
2798         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
2799         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2800                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2801                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2802                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2803
2804                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2805                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2806                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2807                                   PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2808                                   PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2809                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2810                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2811                                   PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2812                                   PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2813                                   PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2814                                   PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2815                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2816                 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2817                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2818                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2819                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2820                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2821         }
2822
2823         r = amdgpu_ring_test_helper(kiq_ring);
2824         if (r)
2825                 DRM_ERROR("KCQ enable failed\n");
2826
2827         return r;
2828 }
2829
2830 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2831 {
2832         struct amdgpu_device *adev = ring->adev;
2833         struct v9_mqd *mqd = ring->mqd_ptr;
2834         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2835         uint32_t tmp;
2836
2837         mqd->header = 0xC0310800;
2838         mqd->compute_pipelinestat_enable = 0x00000001;
2839         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2840         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2841         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2842         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2843         mqd->compute_misc_reserved = 0x00000003;
2844
2845         mqd->dynamic_cu_mask_addr_lo =
2846                 lower_32_bits(ring->mqd_gpu_addr
2847                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2848         mqd->dynamic_cu_mask_addr_hi =
2849                 upper_32_bits(ring->mqd_gpu_addr
2850                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2851
2852         eop_base_addr = ring->eop_gpu_addr >> 8;
2853         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2854         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2855
2856         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2857         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
2858         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2859                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
2860
2861         mqd->cp_hqd_eop_control = tmp;
2862
2863         /* enable doorbell? */
2864         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2865
2866         if (ring->use_doorbell) {
2867                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2868                                     DOORBELL_OFFSET, ring->doorbell_index);
2869                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2870                                     DOORBELL_EN, 1);
2871                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2872                                     DOORBELL_SOURCE, 0);
2873                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2874                                     DOORBELL_HIT, 0);
2875         } else {
2876                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2877                                          DOORBELL_EN, 0);
2878         }
2879
2880         mqd->cp_hqd_pq_doorbell_control = tmp;
2881
2882         /* disable the queue if it's active */
2883         ring->wptr = 0;
2884         mqd->cp_hqd_dequeue_request = 0;
2885         mqd->cp_hqd_pq_rptr = 0;
2886         mqd->cp_hqd_pq_wptr_lo = 0;
2887         mqd->cp_hqd_pq_wptr_hi = 0;
2888
2889         /* set the pointer to the MQD */
2890         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
2891         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
2892
2893         /* set MQD vmid to 0 */
2894         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
2895         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2896         mqd->cp_mqd_control = tmp;
2897
2898         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2899         hqd_gpu_addr = ring->gpu_addr >> 8;
2900         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2901         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2902
2903         /* set up the HQD, this is similar to CP_RB0_CNTL */
2904         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
2905         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2906                             (order_base_2(ring->ring_size / 4) - 1));
2907         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2908                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2909 #ifdef __BIG_ENDIAN
2910         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
2911 #endif
2912         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2913         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
2914         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2915         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2916         mqd->cp_hqd_pq_control = tmp;
2917
2918         /* set the wb address whether it's enabled or not */
2919         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2920         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2921         mqd->cp_hqd_pq_rptr_report_addr_hi =
2922                 upper_32_bits(wb_gpu_addr) & 0xffff;
2923
2924         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2925         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2926         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2927         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2928
2929         tmp = 0;
2930         /* enable the doorbell if requested */
2931         if (ring->use_doorbell) {
2932                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2933                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2934                                 DOORBELL_OFFSET, ring->doorbell_index);
2935
2936                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2937                                          DOORBELL_EN, 1);
2938                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2939                                          DOORBELL_SOURCE, 0);
2940                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2941                                          DOORBELL_HIT, 0);
2942         }
2943
2944         mqd->cp_hqd_pq_doorbell_control = tmp;
2945
2946         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2947         ring->wptr = 0;
2948         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
2949
2950         /* set the vmid for the queue */
2951         mqd->cp_hqd_vmid = 0;
2952
2953         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
2954         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2955         mqd->cp_hqd_persistent_state = tmp;
2956
2957         /* set MIN_IB_AVAIL_SIZE */
2958         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
2959         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
2960         mqd->cp_hqd_ib_control = tmp;
2961
2962         /* activate the queue */
2963         mqd->cp_hqd_active = 1;
2964
2965         return 0;
2966 }
2967
2968 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2969 {
2970         struct amdgpu_device *adev = ring->adev;
2971         struct v9_mqd *mqd = ring->mqd_ptr;
2972         int j;
2973
2974         /* disable wptr polling */
2975         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2976
2977         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
2978                mqd->cp_hqd_eop_base_addr_lo);
2979         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
2980                mqd->cp_hqd_eop_base_addr_hi);
2981
2982         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2983         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
2984                mqd->cp_hqd_eop_control);
2985
2986         /* enable doorbell? */
2987         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
2988                mqd->cp_hqd_pq_doorbell_control);
2989
2990         /* disable the queue if it's active */
2991         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
2992                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
2993                 for (j = 0; j < adev->usec_timeout; j++) {
2994                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
2995                                 break;
2996                         udelay(1);
2997                 }
2998                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
2999                        mqd->cp_hqd_dequeue_request);
3000                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3001                        mqd->cp_hqd_pq_rptr);
3002                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3003                        mqd->cp_hqd_pq_wptr_lo);
3004                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3005                        mqd->cp_hqd_pq_wptr_hi);
3006         }
3007
3008         /* set the pointer to the MQD */
3009         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3010                mqd->cp_mqd_base_addr_lo);
3011         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3012                mqd->cp_mqd_base_addr_hi);
3013
3014         /* set MQD vmid to 0 */
3015         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3016                mqd->cp_mqd_control);
3017
3018         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3019         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3020                mqd->cp_hqd_pq_base_lo);
3021         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3022                mqd->cp_hqd_pq_base_hi);
3023
3024         /* set up the HQD, this is similar to CP_RB0_CNTL */
3025         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3026                mqd->cp_hqd_pq_control);
3027
3028         /* set the wb address whether it's enabled or not */
3029         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3030                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3031         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3032                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3033
3034         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3035         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3036                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3037         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3038                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3039
3040         /* enable the doorbell if requested */
3041         if (ring->use_doorbell) {
3042                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3043                                         (adev->doorbell_index.kiq * 2) << 2);
3044                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3045                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3046         }
3047
3048         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3049                mqd->cp_hqd_pq_doorbell_control);
3050
3051         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3052         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3053                mqd->cp_hqd_pq_wptr_lo);
3054         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3055                mqd->cp_hqd_pq_wptr_hi);
3056
3057         /* set the vmid for the queue */
3058         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3059
3060         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3061                mqd->cp_hqd_persistent_state);
3062
3063         /* activate the queue */
3064         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3065                mqd->cp_hqd_active);
3066
3067         if (ring->use_doorbell)
3068                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3069
3070         return 0;
3071 }
3072
3073 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3074 {
3075         struct amdgpu_device *adev = ring->adev;
3076         int j;
3077
3078         /* disable the queue if it's active */
3079         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3080
3081                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3082
3083                 for (j = 0; j < adev->usec_timeout; j++) {
3084                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3085                                 break;
3086                         udelay(1);
3087                 }
3088
3089                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3090                         DRM_DEBUG("KIQ dequeue request failed.\n");
3091
3092                         /* Manual disable if dequeue request times out */
3093                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3094                 }
3095
3096                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3097                       0);
3098         }
3099
3100         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3101         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3102         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3103         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3104         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3105         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3106         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3107         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3108
3109         return 0;
3110 }
3111
3112 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3113 {
3114         struct amdgpu_device *adev = ring->adev;
3115         struct v9_mqd *mqd = ring->mqd_ptr;
3116         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3117
3118         gfx_v9_0_kiq_setting(ring);
3119
3120         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3121                 /* reset MQD to a clean status */
3122                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3123                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3124
3125                 /* reset ring buffer */
3126                 ring->wptr = 0;
3127                 amdgpu_ring_clear_ring(ring);
3128
3129                 mutex_lock(&adev->srbm_mutex);
3130                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3131                 gfx_v9_0_kiq_init_register(ring);
3132                 soc15_grbm_select(adev, 0, 0, 0, 0);
3133                 mutex_unlock(&adev->srbm_mutex);
3134         } else {
3135                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3136                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3137                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3138                 mutex_lock(&adev->srbm_mutex);
3139                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3140                 gfx_v9_0_mqd_init(ring);
3141                 gfx_v9_0_kiq_init_register(ring);
3142                 soc15_grbm_select(adev, 0, 0, 0, 0);
3143                 mutex_unlock(&adev->srbm_mutex);
3144
3145                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3146                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3147         }
3148
3149         return 0;
3150 }
3151
3152 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3153 {
3154         struct amdgpu_device *adev = ring->adev;
3155         struct v9_mqd *mqd = ring->mqd_ptr;
3156         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3157
3158         if (!adev->in_gpu_reset && !adev->in_suspend) {
3159                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3160                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3161                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3162                 mutex_lock(&adev->srbm_mutex);
3163                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3164                 gfx_v9_0_mqd_init(ring);
3165                 soc15_grbm_select(adev, 0, 0, 0, 0);
3166                 mutex_unlock(&adev->srbm_mutex);
3167
3168                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3169                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3170         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3171                 /* reset MQD to a clean status */
3172                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3173                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3174
3175                 /* reset ring buffer */
3176                 ring->wptr = 0;
3177                 amdgpu_ring_clear_ring(ring);
3178         } else {
3179                 amdgpu_ring_clear_ring(ring);
3180         }
3181
3182         return 0;
3183 }
3184
3185 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3186 {
3187         struct amdgpu_ring *ring;
3188         int r;
3189
3190         ring = &adev->gfx.kiq.ring;
3191
3192         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3193         if (unlikely(r != 0))
3194                 return r;
3195
3196         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3197         if (unlikely(r != 0))
3198                 return r;
3199
3200         gfx_v9_0_kiq_init_queue(ring);
3201         amdgpu_bo_kunmap(ring->mqd_obj);
3202         ring->mqd_ptr = NULL;
3203         amdgpu_bo_unreserve(ring->mqd_obj);
3204         ring->sched.ready = true;
3205         return 0;
3206 }
3207
3208 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3209 {
3210         struct amdgpu_ring *ring = NULL;
3211         int r = 0, i;
3212
3213         gfx_v9_0_cp_compute_enable(adev, true);
3214
3215         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3216                 ring = &adev->gfx.compute_ring[i];
3217
3218                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3219                 if (unlikely(r != 0))
3220                         goto done;
3221                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3222                 if (!r) {
3223                         r = gfx_v9_0_kcq_init_queue(ring);
3224                         amdgpu_bo_kunmap(ring->mqd_obj);
3225                         ring->mqd_ptr = NULL;
3226                 }
3227                 amdgpu_bo_unreserve(ring->mqd_obj);
3228                 if (r)
3229                         goto done;
3230         }
3231
3232         r = gfx_v9_0_kiq_kcq_enable(adev);
3233 done:
3234         return r;
3235 }
3236
3237 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3238 {
3239         int r, i;
3240         struct amdgpu_ring *ring;
3241
3242         if (!(adev->flags & AMD_IS_APU))
3243                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3244
3245         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3246                 /* legacy firmware loading */
3247                 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3248                 if (r)
3249                         return r;
3250
3251                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3252                 if (r)
3253                         return r;
3254         }
3255
3256         r = gfx_v9_0_kiq_resume(adev);
3257         if (r)
3258                 return r;
3259
3260         r = gfx_v9_0_cp_gfx_resume(adev);
3261         if (r)
3262                 return r;
3263
3264         r = gfx_v9_0_kcq_resume(adev);
3265         if (r)
3266                 return r;
3267
3268         ring = &adev->gfx.gfx_ring[0];
3269         r = amdgpu_ring_test_helper(ring);
3270         if (r)
3271                 return r;
3272
3273         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3274                 ring = &adev->gfx.compute_ring[i];
3275                 amdgpu_ring_test_helper(ring);
3276         }
3277
3278         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3279
3280         return 0;
3281 }
3282
3283 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3284 {
3285         gfx_v9_0_cp_gfx_enable(adev, enable);
3286         gfx_v9_0_cp_compute_enable(adev, enable);
3287 }
3288
3289 static int gfx_v9_0_hw_init(void *handle)
3290 {
3291         int r;
3292         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3293
3294         gfx_v9_0_init_golden_registers(adev);
3295
3296         gfx_v9_0_constants_init(adev);
3297
3298         r = gfx_v9_0_csb_vram_pin(adev);
3299         if (r)
3300                 return r;
3301
3302         r = adev->gfx.rlc.funcs->resume(adev);
3303         if (r)
3304                 return r;
3305
3306         r = gfx_v9_0_cp_resume(adev);
3307         if (r)
3308                 return r;
3309
3310         r = gfx_v9_0_ngg_en(adev);
3311         if (r)
3312                 return r;
3313
3314         return r;
3315 }
3316
3317 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3318 {
3319         int r, i;
3320         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3321
3322         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3323         if (r)
3324                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3325
3326         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3327                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3328
3329                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3330                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3331                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3332                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3333                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3334                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3335                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3336                 amdgpu_ring_write(kiq_ring, 0);
3337                 amdgpu_ring_write(kiq_ring, 0);
3338                 amdgpu_ring_write(kiq_ring, 0);
3339         }
3340         r = amdgpu_ring_test_helper(kiq_ring);
3341         if (r)
3342                 DRM_ERROR("KCQ disable failed\n");
3343
3344         return r;
3345 }
3346
3347 static int gfx_v9_0_hw_fini(void *handle)
3348 {
3349         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3350
3351         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3352         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3353         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3354
3355         /* disable KCQ to avoid CPC touch memory not valid anymore */
3356         gfx_v9_0_kcq_disable(adev);
3357
3358         if (amdgpu_sriov_vf(adev)) {
3359                 gfx_v9_0_cp_gfx_enable(adev, false);
3360                 /* must disable polling for SRIOV when hw finished, otherwise
3361                  * CPC engine may still keep fetching WB address which is already
3362                  * invalid after sw finished and trigger DMAR reading error in
3363                  * hypervisor side.
3364                  */
3365                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3366                 return 0;
3367         }
3368
3369         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3370          * otherwise KIQ is hanging when binding back
3371          */
3372         if (!adev->in_gpu_reset && !adev->in_suspend) {
3373                 mutex_lock(&adev->srbm_mutex);
3374                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3375                                 adev->gfx.kiq.ring.pipe,
3376                                 adev->gfx.kiq.ring.queue, 0);
3377                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3378                 soc15_grbm_select(adev, 0, 0, 0, 0);
3379                 mutex_unlock(&adev->srbm_mutex);
3380         }
3381
3382         gfx_v9_0_cp_enable(adev, false);
3383         adev->gfx.rlc.funcs->stop(adev);
3384
3385         gfx_v9_0_csb_vram_unpin(adev);
3386
3387         return 0;
3388 }
3389
3390 static int gfx_v9_0_suspend(void *handle)
3391 {
3392         return gfx_v9_0_hw_fini(handle);
3393 }
3394
3395 static int gfx_v9_0_resume(void *handle)
3396 {
3397         return gfx_v9_0_hw_init(handle);
3398 }
3399
3400 static bool gfx_v9_0_is_idle(void *handle)
3401 {
3402         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3403
3404         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3405                                 GRBM_STATUS, GUI_ACTIVE))
3406                 return false;
3407         else
3408                 return true;
3409 }
3410
3411 static int gfx_v9_0_wait_for_idle(void *handle)
3412 {
3413         unsigned i;
3414         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3415
3416         for (i = 0; i < adev->usec_timeout; i++) {
3417                 if (gfx_v9_0_is_idle(handle))
3418                         return 0;
3419                 udelay(1);
3420         }
3421         return -ETIMEDOUT;
3422 }
3423
3424 static int gfx_v9_0_soft_reset(void *handle)
3425 {
3426         u32 grbm_soft_reset = 0;
3427         u32 tmp;
3428         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3429
3430         /* GRBM_STATUS */
3431         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3432         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3433                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3434                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3435                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3436                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3437                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3438                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3439                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3440                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3441                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3442         }
3443
3444         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3445                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3446                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3447         }
3448
3449         /* GRBM_STATUS2 */
3450         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3451         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3452                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3453                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3454
3455
3456         if (grbm_soft_reset) {
3457                 /* stop the rlc */
3458                 adev->gfx.rlc.funcs->stop(adev);
3459
3460                 /* Disable GFX parsing/prefetching */
3461                 gfx_v9_0_cp_gfx_enable(adev, false);
3462
3463                 /* Disable MEC parsing/prefetching */
3464                 gfx_v9_0_cp_compute_enable(adev, false);
3465
3466                 if (grbm_soft_reset) {
3467                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3468                         tmp |= grbm_soft_reset;
3469                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3470                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3471                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3472
3473                         udelay(50);
3474
3475                         tmp &= ~grbm_soft_reset;
3476                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3477                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3478                 }
3479
3480                 /* Wait a little for things to settle down */
3481                 udelay(50);
3482         }
3483         return 0;
3484 }
3485
3486 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3487 {
3488         uint64_t clock;
3489
3490         mutex_lock(&adev->gfx.gpu_clock_mutex);
3491         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3492         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3493                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3494         mutex_unlock(&adev->gfx.gpu_clock_mutex);
3495         return clock;
3496 }
3497
3498 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3499                                           uint32_t vmid,
3500                                           uint32_t gds_base, uint32_t gds_size,
3501                                           uint32_t gws_base, uint32_t gws_size,
3502                                           uint32_t oa_base, uint32_t oa_size)
3503 {
3504         struct amdgpu_device *adev = ring->adev;
3505
3506         /* GDS Base */
3507         gfx_v9_0_write_data_to_reg(ring, 0, false,
3508                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3509                                    gds_base);
3510
3511         /* GDS Size */
3512         gfx_v9_0_write_data_to_reg(ring, 0, false,
3513                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3514                                    gds_size);
3515
3516         /* GWS */
3517         gfx_v9_0_write_data_to_reg(ring, 0, false,
3518                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3519                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3520
3521         /* OA */
3522         gfx_v9_0_write_data_to_reg(ring, 0, false,
3523                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3524                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
3525 }
3526
3527 static const u32 vgpr_init_compute_shader[] =
3528 {
3529         0xb07c0000, 0xbe8000ff,
3530         0x000000f8, 0xbf110800,
3531         0x7e000280, 0x7e020280,
3532         0x7e040280, 0x7e060280,
3533         0x7e080280, 0x7e0a0280,
3534         0x7e0c0280, 0x7e0e0280,
3535         0x80808800, 0xbe803200,
3536         0xbf84fff5, 0xbf9c0000,
3537         0xd28c0001, 0x0001007f,
3538         0xd28d0001, 0x0002027e,
3539         0x10020288, 0xb8810904,
3540         0xb7814000, 0xd1196a01,
3541         0x00000301, 0xbe800087,
3542         0xbefc00c1, 0xd89c4000,
3543         0x00020201, 0xd89cc080,
3544         0x00040401, 0x320202ff,
3545         0x00000800, 0x80808100,
3546         0xbf84fff8, 0x7e020280,
3547         0xbf810000, 0x00000000,
3548 };
3549
3550 static const u32 sgpr_init_compute_shader[] =
3551 {
3552         0xb07c0000, 0xbe8000ff,
3553         0x0000005f, 0xbee50080,
3554         0xbe812c65, 0xbe822c65,
3555         0xbe832c65, 0xbe842c65,
3556         0xbe852c65, 0xb77c0005,
3557         0x80808500, 0xbf84fff8,
3558         0xbe800080, 0xbf810000,
3559 };
3560
3561 static const struct soc15_reg_entry vgpr_init_regs[] = {
3562    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3563    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3564    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3565    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3566    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3567    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3568    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3569    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3570    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3571    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3572 };
3573
3574 static const struct soc15_reg_entry sgpr_init_regs[] = {
3575    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3576    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3577    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3578    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3579    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3580    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3581    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3582    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3583    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
3584    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3585 };
3586
3587 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3588    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
3589    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
3590    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
3591    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
3592    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
3593    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
3594    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
3595    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
3596    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
3597    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
3598    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
3599    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
3600    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
3601    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
3602    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
3603    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
3604    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
3605    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
3606    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
3607    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
3608    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
3609    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
3610    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
3611    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
3612    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
3613    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
3614    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
3615    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
3616    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
3617    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
3618    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
3619    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
3620 };
3621
3622 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
3623 {
3624         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3625         int i, r;
3626
3627         r = amdgpu_ring_alloc(ring, 7);
3628         if (r) {
3629                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
3630                         ring->name, r);
3631                 return r;
3632         }
3633
3634         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
3635         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
3636
3637         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3638         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
3639                                 PACKET3_DMA_DATA_DST_SEL(1) |
3640                                 PACKET3_DMA_DATA_SRC_SEL(2) |
3641                                 PACKET3_DMA_DATA_ENGINE(0)));
3642         amdgpu_ring_write(ring, 0);
3643         amdgpu_ring_write(ring, 0);
3644         amdgpu_ring_write(ring, 0);
3645         amdgpu_ring_write(ring, 0);
3646         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
3647                                 adev->gds.gds_size);
3648
3649         amdgpu_ring_commit(ring);
3650
3651         for (i = 0; i < adev->usec_timeout; i++) {
3652                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
3653                         break;
3654                 udelay(1);
3655         }
3656
3657         if (i >= adev->usec_timeout)
3658                 r = -ETIMEDOUT;
3659
3660         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
3661
3662         return r;
3663 }
3664
3665 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
3666 {
3667         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3668         struct amdgpu_ib ib;
3669         struct dma_fence *f = NULL;
3670         int r, i, j, k;
3671         unsigned total_size, vgpr_offset, sgpr_offset;
3672         u64 gpu_addr;
3673
3674         /* only support when RAS is enabled */
3675         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3676                 return 0;
3677
3678         /* bail if the compute ring is not ready */
3679         if (!ring->sched.ready)
3680                 return 0;
3681
3682         total_size =
3683                 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3684         total_size +=
3685                 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3686         total_size = ALIGN(total_size, 256);
3687         vgpr_offset = total_size;
3688         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
3689         sgpr_offset = total_size;
3690         total_size += sizeof(sgpr_init_compute_shader);
3691
3692         /* allocate an indirect buffer to put the commands in */
3693         memset(&ib, 0, sizeof(ib));
3694         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
3695         if (r) {
3696                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
3697                 return r;
3698         }
3699
3700         /* load the compute shaders */
3701         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
3702                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
3703
3704         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
3705                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
3706
3707         /* init the ib length to 0 */
3708         ib.length_dw = 0;
3709
3710         /* VGPR */
3711         /* write the register state for the compute dispatch */
3712         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
3713                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3714                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
3715                                                                 - PACKET3_SET_SH_REG_START;
3716                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
3717         }
3718         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3719         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
3720         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3721         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3722                                                         - PACKET3_SET_SH_REG_START;
3723         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3724         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3725
3726         /* write dispatch packet */
3727         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3728         ib.ptr[ib.length_dw++] = 128; /* x */
3729         ib.ptr[ib.length_dw++] = 1; /* y */
3730         ib.ptr[ib.length_dw++] = 1; /* z */
3731         ib.ptr[ib.length_dw++] =
3732                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3733
3734         /* write CS partial flush packet */
3735         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3736         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3737
3738         /* SGPR */
3739         /* write the register state for the compute dispatch */
3740         for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
3741                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3742                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
3743                                                                 - PACKET3_SET_SH_REG_START;
3744                 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
3745         }
3746         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3747         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
3748         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3749         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3750                                                         - PACKET3_SET_SH_REG_START;
3751         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3752         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3753
3754         /* write dispatch packet */
3755         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3756         ib.ptr[ib.length_dw++] = 128; /* x */
3757         ib.ptr[ib.length_dw++] = 1; /* y */
3758         ib.ptr[ib.length_dw++] = 1; /* z */
3759         ib.ptr[ib.length_dw++] =
3760                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3761
3762         /* write CS partial flush packet */
3763         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3764         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3765
3766         /* shedule the ib on the ring */
3767         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
3768         if (r) {
3769                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
3770                 goto fail;
3771         }
3772
3773         /* wait for the GPU to finish processing the IB */
3774         r = dma_fence_wait(f, false);
3775         if (r) {
3776                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
3777                 goto fail;
3778         }
3779
3780         /* read back registers to clear the counters */
3781         mutex_lock(&adev->grbm_idx_mutex);
3782         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
3783                 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
3784                         for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
3785                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
3786                                 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3787                         }
3788                 }
3789         }
3790         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
3791         mutex_unlock(&adev->grbm_idx_mutex);
3792
3793 fail:
3794         amdgpu_ib_free(adev, &ib, NULL);
3795         dma_fence_put(f);
3796
3797         return r;
3798 }
3799
3800 static int gfx_v9_0_early_init(void *handle)
3801 {
3802         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3803
3804         adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
3805         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
3806         gfx_v9_0_set_ring_funcs(adev);
3807         gfx_v9_0_set_irq_funcs(adev);
3808         gfx_v9_0_set_gds_init(adev);
3809         gfx_v9_0_set_rlc_funcs(adev);
3810
3811         return 0;
3812 }
3813
3814 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
3815                 struct amdgpu_iv_entry *entry);
3816
3817 static int gfx_v9_0_ecc_late_init(void *handle)
3818 {
3819         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3820         struct ras_common_if **ras_if = &adev->gfx.ras_if;
3821         struct ras_ih_if ih_info = {
3822                 .cb = gfx_v9_0_process_ras_data_cb,
3823         };
3824         struct ras_fs_if fs_info = {
3825                 .sysfs_name = "gfx_err_count",
3826                 .debugfs_name = "gfx_err_inject",
3827         };
3828         struct ras_common_if ras_block = {
3829                 .block = AMDGPU_RAS_BLOCK__GFX,
3830                 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
3831                 .sub_block_index = 0,
3832                 .name = "gfx",
3833         };
3834         int r;
3835
3836         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
3837                 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
3838                 return 0;
3839         }
3840
3841         r = gfx_v9_0_do_edc_gds_workarounds(adev);
3842         if (r)
3843                 return r;
3844
3845         /* requires IBs so do in late init after IB pool is initialized */
3846         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
3847         if (r)
3848                 return r;
3849
3850         /* handle resume path. */
3851         if (*ras_if) {
3852                 /* resend ras TA enable cmd during resume.
3853                  * prepare to handle failure.
3854                  */
3855                 ih_info.head = **ras_if;
3856                 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3857                 if (r) {
3858                         if (r == -EAGAIN) {
3859                                 /* request a gpu reset. will run again. */
3860                                 amdgpu_ras_request_reset_on_boot(adev,
3861                                                 AMDGPU_RAS_BLOCK__GFX);
3862                                 return 0;
3863                         }
3864                         /* fail to enable ras, cleanup all. */
3865                         goto irq;
3866                 }
3867                 /* enable successfully. continue. */
3868                 goto resume;
3869         }
3870
3871         *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
3872         if (!*ras_if)
3873                 return -ENOMEM;
3874
3875         **ras_if = ras_block;
3876
3877         r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3878         if (r) {
3879                 if (r == -EAGAIN) {
3880                         amdgpu_ras_request_reset_on_boot(adev,
3881                                         AMDGPU_RAS_BLOCK__GFX);
3882                         r = 0;
3883                 }
3884                 goto feature;
3885         }
3886
3887         ih_info.head = **ras_if;
3888         fs_info.head = **ras_if;
3889
3890         r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
3891         if (r)
3892                 goto interrupt;
3893
3894         amdgpu_ras_debugfs_create(adev, &fs_info);
3895
3896         r = amdgpu_ras_sysfs_create(adev, &fs_info);
3897         if (r)
3898                 goto sysfs;
3899 resume:
3900         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
3901         if (r)
3902                 goto irq;
3903
3904         return 0;
3905 irq:
3906         amdgpu_ras_sysfs_remove(adev, *ras_if);
3907 sysfs:
3908         amdgpu_ras_debugfs_remove(adev, *ras_if);
3909         amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
3910 interrupt:
3911         amdgpu_ras_feature_enable(adev, *ras_if, 0);
3912 feature:
3913         kfree(*ras_if);
3914         *ras_if = NULL;
3915         return r;
3916 }
3917
3918 static int gfx_v9_0_late_init(void *handle)
3919 {
3920         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3921         int r;
3922
3923         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3924         if (r)
3925                 return r;
3926
3927         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3928         if (r)
3929                 return r;
3930
3931         r = gfx_v9_0_ecc_late_init(handle);
3932         if (r)
3933                 return r;
3934
3935         return 0;
3936 }
3937
3938 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
3939 {
3940         uint32_t rlc_setting;
3941
3942         /* if RLC is not enabled, do nothing */
3943         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3944         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3945                 return false;
3946
3947         return true;
3948 }
3949
3950 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
3951 {
3952         uint32_t data;
3953         unsigned i;
3954
3955         data = RLC_SAFE_MODE__CMD_MASK;
3956         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3957         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3958
3959         /* wait for RLC_SAFE_MODE */
3960         for (i = 0; i < adev->usec_timeout; i++) {
3961                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3962                         break;
3963                 udelay(1);
3964         }
3965 }
3966
3967 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
3968 {
3969         uint32_t data;
3970
3971         data = RLC_SAFE_MODE__CMD_MASK;
3972         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3973 }
3974
3975 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3976                                                 bool enable)
3977 {
3978         amdgpu_gfx_rlc_enter_safe_mode(adev);
3979
3980         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3981                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
3982                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
3983                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
3984         } else {
3985                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
3986                 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
3987         }
3988
3989         amdgpu_gfx_rlc_exit_safe_mode(adev);
3990 }
3991
3992 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
3993                                                 bool enable)
3994 {
3995         /* TODO: double check if we need to perform under safe mode */
3996         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
3997
3998         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
3999                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4000         else
4001                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4002
4003         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4004                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4005         else
4006                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4007
4008         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4009 }
4010
4011 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4012                                                       bool enable)
4013 {
4014         uint32_t data, def;
4015
4016         amdgpu_gfx_rlc_enter_safe_mode(adev);
4017
4018         /* It is disabled by HW by default */
4019         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4020                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4021                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4022
4023                 if (adev->asic_type != CHIP_VEGA12)
4024                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4025
4026                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4027                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4028                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4029
4030                 /* only for Vega10 & Raven1 */
4031                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4032
4033                 if (def != data)
4034                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4035
4036                 /* MGLS is a global flag to control all MGLS in GFX */
4037                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4038                         /* 2 - RLC memory Light sleep */
4039                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4040                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4041                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4042                                 if (def != data)
4043                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4044                         }
4045                         /* 3 - CP memory Light sleep */
4046                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4047                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4048                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4049                                 if (def != data)
4050                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4051                         }
4052                 }
4053         } else {
4054                 /* 1 - MGCG_OVERRIDE */
4055                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4056
4057                 if (adev->asic_type != CHIP_VEGA12)
4058                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4059
4060                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4061                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4062                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4063                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4064
4065                 if (def != data)
4066                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4067
4068                 /* 2 - disable MGLS in RLC */
4069                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4070                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4071                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4072                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4073                 }
4074
4075                 /* 3 - disable MGLS in CP */
4076                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4077                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4078                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4079                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4080                 }
4081         }
4082
4083         amdgpu_gfx_rlc_exit_safe_mode(adev);
4084 }
4085
4086 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4087                                            bool enable)
4088 {
4089         uint32_t data, def;
4090
4091         amdgpu_gfx_rlc_enter_safe_mode(adev);
4092
4093         /* Enable 3D CGCG/CGLS */
4094         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4095                 /* write cmd to clear cgcg/cgls ov */
4096                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4097                 /* unset CGCG override */
4098                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4099                 /* update CGCG and CGLS override bits */
4100                 if (def != data)
4101                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4102
4103                 /* enable 3Dcgcg FSM(0x0000363f) */
4104                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4105
4106                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4107                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4108                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4109                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4110                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4111                 if (def != data)
4112                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4113
4114                 /* set IDLE_POLL_COUNT(0x00900100) */
4115                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4116                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4117                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4118                 if (def != data)
4119                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4120         } else {
4121                 /* Disable CGCG/CGLS */
4122                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4123                 /* disable cgcg, cgls should be disabled */
4124                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4125                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4126                 /* disable cgcg and cgls in FSM */
4127                 if (def != data)
4128                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4129         }
4130
4131         amdgpu_gfx_rlc_exit_safe_mode(adev);
4132 }
4133
4134 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4135                                                       bool enable)
4136 {
4137         uint32_t def, data;
4138
4139         amdgpu_gfx_rlc_enter_safe_mode(adev);
4140
4141         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4142                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4143                 /* unset CGCG override */
4144                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4145                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4146                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4147                 else
4148                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4149                 /* update CGCG and CGLS override bits */
4150                 if (def != data)
4151                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4152
4153                 /* enable cgcg FSM(0x0000363F) */
4154                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4155
4156                 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4157                         RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4158                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4159                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4160                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4161                 if (def != data)
4162                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4163
4164                 /* set IDLE_POLL_COUNT(0x00900100) */
4165                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4166                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4167                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4168                 if (def != data)
4169                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4170         } else {
4171                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4172                 /* reset CGCG/CGLS bits */
4173                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4174                 /* disable cgcg and cgls in FSM */
4175                 if (def != data)
4176                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4177         }
4178
4179         amdgpu_gfx_rlc_exit_safe_mode(adev);
4180 }
4181
4182 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4183                                             bool enable)
4184 {
4185         if (enable) {
4186                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4187                  * ===  MGCG + MGLS ===
4188                  */
4189                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4190                 /* ===  CGCG /CGLS for GFX 3D Only === */
4191                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4192                 /* ===  CGCG + CGLS === */
4193                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4194         } else {
4195                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4196                  * ===  CGCG + CGLS ===
4197                  */
4198                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4199                 /* ===  CGCG /CGLS for GFX 3D Only === */
4200                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4201                 /* ===  MGCG + MGLS === */
4202                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4203         }
4204         return 0;
4205 }
4206
4207 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4208         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4209         .set_safe_mode = gfx_v9_0_set_safe_mode,
4210         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4211         .init = gfx_v9_0_rlc_init,
4212         .get_csb_size = gfx_v9_0_get_csb_size,
4213         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4214         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4215         .resume = gfx_v9_0_rlc_resume,
4216         .stop = gfx_v9_0_rlc_stop,
4217         .reset = gfx_v9_0_rlc_reset,
4218         .start = gfx_v9_0_rlc_start
4219 };
4220
4221 static int gfx_v9_0_set_powergating_state(void *handle,
4222                                           enum amd_powergating_state state)
4223 {
4224         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4225         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4226
4227         switch (adev->asic_type) {
4228         case CHIP_RAVEN:
4229                 if (!enable) {
4230                         amdgpu_gfx_off_ctrl(adev, false);
4231                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4232                 }
4233                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4234                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4235                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4236                 } else {
4237                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4238                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4239                 }
4240
4241                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4242                         gfx_v9_0_enable_cp_power_gating(adev, true);
4243                 else
4244                         gfx_v9_0_enable_cp_power_gating(adev, false);
4245
4246                 /* update gfx cgpg state */
4247                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4248
4249                 /* update mgcg state */
4250                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4251
4252                 if (enable)
4253                         amdgpu_gfx_off_ctrl(adev, true);
4254                 break;
4255         case CHIP_VEGA12:
4256                 if (!enable) {
4257                         amdgpu_gfx_off_ctrl(adev, false);
4258                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4259                 } else {
4260                         amdgpu_gfx_off_ctrl(adev, true);
4261                 }
4262                 break;
4263         default:
4264                 break;
4265         }
4266
4267         return 0;
4268 }
4269
4270 static int gfx_v9_0_set_clockgating_state(void *handle,
4271                                           enum amd_clockgating_state state)
4272 {
4273         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4274
4275         if (amdgpu_sriov_vf(adev))
4276                 return 0;
4277
4278         switch (adev->asic_type) {
4279         case CHIP_VEGA10:
4280         case CHIP_VEGA12:
4281         case CHIP_VEGA20:
4282         case CHIP_RAVEN:
4283                 gfx_v9_0_update_gfx_clock_gating(adev,
4284                                                  state == AMD_CG_STATE_GATE ? true : false);
4285                 break;
4286         default:
4287                 break;
4288         }
4289         return 0;
4290 }
4291
4292 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4293 {
4294         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4295         int data;
4296
4297         if (amdgpu_sriov_vf(adev))
4298                 *flags = 0;
4299
4300         /* AMD_CG_SUPPORT_GFX_MGCG */
4301         data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4302         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4303                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4304
4305         /* AMD_CG_SUPPORT_GFX_CGCG */
4306         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4307         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4308                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4309
4310         /* AMD_CG_SUPPORT_GFX_CGLS */
4311         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4312                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4313
4314         /* AMD_CG_SUPPORT_GFX_RLC_LS */
4315         data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4316         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4317                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4318
4319         /* AMD_CG_SUPPORT_GFX_CP_LS */
4320         data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4321         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4322                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4323
4324         /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4325         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4326         if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4327                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4328
4329         /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4330         if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4331                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4332 }
4333
4334 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4335 {
4336         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4337 }
4338
4339 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4340 {
4341         struct amdgpu_device *adev = ring->adev;
4342         u64 wptr;
4343
4344         /* XXX check if swapping is necessary on BE */
4345         if (ring->use_doorbell) {
4346                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4347         } else {
4348                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4349                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4350         }
4351
4352         return wptr;
4353 }
4354
4355 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4356 {
4357         struct amdgpu_device *adev = ring->adev;
4358
4359         if (ring->use_doorbell) {
4360                 /* XXX check if swapping is necessary on BE */
4361                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4362                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4363         } else {
4364                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4365                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4366         }
4367 }
4368
4369 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4370 {
4371         struct amdgpu_device *adev = ring->adev;
4372         u32 ref_and_mask, reg_mem_engine;
4373         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4374
4375         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4376                 switch (ring->me) {
4377                 case 1:
4378                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4379                         break;
4380                 case 2:
4381                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4382                         break;
4383                 default:
4384                         return;
4385                 }
4386                 reg_mem_engine = 0;
4387         } else {
4388                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4389                 reg_mem_engine = 1; /* pfp */
4390         }
4391
4392         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4393                               adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4394                               adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4395                               ref_and_mask, ref_and_mask, 0x20);
4396 }
4397
4398 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4399                                         struct amdgpu_job *job,
4400                                         struct amdgpu_ib *ib,
4401                                         uint32_t flags)
4402 {
4403         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4404         u32 header, control = 0;
4405
4406         if (ib->flags & AMDGPU_IB_FLAG_CE)
4407                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4408         else
4409                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4410
4411         control |= ib->length_dw | (vmid << 24);
4412
4413         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4414                 control |= INDIRECT_BUFFER_PRE_ENB(1);
4415
4416                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4417                         gfx_v9_0_ring_emit_de_meta(ring);
4418         }
4419
4420         amdgpu_ring_write(ring, header);
4421         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4422         amdgpu_ring_write(ring,
4423 #ifdef __BIG_ENDIAN
4424                 (2 << 0) |
4425 #endif
4426                 lower_32_bits(ib->gpu_addr));
4427         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4428         amdgpu_ring_write(ring, control);
4429 }
4430
4431 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4432                                           struct amdgpu_job *job,
4433                                           struct amdgpu_ib *ib,
4434                                           uint32_t flags)
4435 {
4436         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4437         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4438
4439         /* Currently, there is a high possibility to get wave ID mismatch
4440          * between ME and GDS, leading to a hw deadlock, because ME generates
4441          * different wave IDs than the GDS expects. This situation happens
4442          * randomly when at least 5 compute pipes use GDS ordered append.
4443          * The wave IDs generated by ME are also wrong after suspend/resume.
4444          * Those are probably bugs somewhere else in the kernel driver.
4445          *
4446          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4447          * GDS to 0 for this ring (me/pipe).
4448          */
4449         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4450                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4451                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4452                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4453         }
4454
4455         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4456         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4457         amdgpu_ring_write(ring,
4458 #ifdef __BIG_ENDIAN
4459                                 (2 << 0) |
4460 #endif
4461                                 lower_32_bits(ib->gpu_addr));
4462         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4463         amdgpu_ring_write(ring, control);
4464 }
4465
4466 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4467                                      u64 seq, unsigned flags)
4468 {
4469         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4470         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4471         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4472
4473         /* RELEASE_MEM - flush caches, send int */
4474         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4475         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4476                                                EOP_TC_NC_ACTION_EN) :
4477                                               (EOP_TCL1_ACTION_EN |
4478                                                EOP_TC_ACTION_EN |
4479                                                EOP_TC_WB_ACTION_EN |
4480                                                EOP_TC_MD_ACTION_EN)) |
4481                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4482                                  EVENT_INDEX(5)));
4483         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4484
4485         /*
4486          * the address should be Qword aligned if 64bit write, Dword
4487          * aligned if only send 32bit data low (discard data high)
4488          */
4489         if (write64bit)
4490                 BUG_ON(addr & 0x7);
4491         else
4492                 BUG_ON(addr & 0x3);
4493         amdgpu_ring_write(ring, lower_32_bits(addr));
4494         amdgpu_ring_write(ring, upper_32_bits(addr));
4495         amdgpu_ring_write(ring, lower_32_bits(seq));
4496         amdgpu_ring_write(ring, upper_32_bits(seq));
4497         amdgpu_ring_write(ring, 0);
4498 }
4499
4500 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4501 {
4502         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4503         uint32_t seq = ring->fence_drv.sync_seq;
4504         uint64_t addr = ring->fence_drv.gpu_addr;
4505
4506         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4507                               lower_32_bits(addr), upper_32_bits(addr),
4508                               seq, 0xffffffff, 4);
4509 }
4510
4511 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4512                                         unsigned vmid, uint64_t pd_addr)
4513 {
4514         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4515
4516         /* compute doesn't have PFP */
4517         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4518                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4519                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4520                 amdgpu_ring_write(ring, 0x0);
4521         }
4522 }
4523
4524 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4525 {
4526         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4527 }
4528
4529 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4530 {
4531         u64 wptr;
4532
4533         /* XXX check if swapping is necessary on BE */
4534         if (ring->use_doorbell)
4535                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4536         else
4537                 BUG();
4538         return wptr;
4539 }
4540
4541 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4542                                            bool acquire)
4543 {
4544         struct amdgpu_device *adev = ring->adev;
4545         int pipe_num, tmp, reg;
4546         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4547
4548         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4549
4550         /* first me only has 2 entries, GFX and HP3D */
4551         if (ring->me > 0)
4552                 pipe_num -= 2;
4553
4554         reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4555         tmp = RREG32(reg);
4556         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4557         WREG32(reg, tmp);
4558 }
4559
4560 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4561                                             struct amdgpu_ring *ring,
4562                                             bool acquire)
4563 {
4564         int i, pipe;
4565         bool reserve;
4566         struct amdgpu_ring *iring;
4567
4568         mutex_lock(&adev->gfx.pipe_reserve_mutex);
4569         pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
4570         if (acquire)
4571                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4572         else
4573                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4574
4575         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4576                 /* Clear all reservations - everyone reacquires all resources */
4577                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4578                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4579                                                        true);
4580
4581                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4582                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4583                                                        true);
4584         } else {
4585                 /* Lower all pipes without a current reservation */
4586                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4587                         iring = &adev->gfx.gfx_ring[i];
4588                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4589                                                            iring->me,
4590                                                            iring->pipe,
4591                                                            0);
4592                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4593                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4594                 }
4595
4596                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4597                         iring = &adev->gfx.compute_ring[i];
4598                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4599                                                            iring->me,
4600                                                            iring->pipe,
4601                                                            0);
4602                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4603                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4604                 }
4605         }
4606
4607         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4608 }
4609
4610 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4611                                       struct amdgpu_ring *ring,
4612                                       bool acquire)
4613 {
4614         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4615         uint32_t queue_priority = acquire ? 0xf : 0x0;
4616
4617         mutex_lock(&adev->srbm_mutex);
4618         soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4619
4620         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4621         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4622
4623         soc15_grbm_select(adev, 0, 0, 0, 0);
4624         mutex_unlock(&adev->srbm_mutex);
4625 }
4626
4627 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4628                                                enum drm_sched_priority priority)
4629 {
4630         struct amdgpu_device *adev = ring->adev;
4631         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4632
4633         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4634                 return;
4635
4636         gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4637         gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4638 }
4639
4640 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4641 {
4642         struct amdgpu_device *adev = ring->adev;
4643
4644         /* XXX check if swapping is necessary on BE */
4645         if (ring->use_doorbell) {
4646                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4647                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4648         } else{
4649                 BUG(); /* only DOORBELL method supported on gfx9 now */
4650         }
4651 }
4652
4653 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4654                                          u64 seq, unsigned int flags)
4655 {
4656         struct amdgpu_device *adev = ring->adev;
4657
4658         /* we only allocate 32bit for each seq wb address */
4659         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4660
4661         /* write fence seq to the "addr" */
4662         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4663         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4664                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4665         amdgpu_ring_write(ring, lower_32_bits(addr));
4666         amdgpu_ring_write(ring, upper_32_bits(addr));
4667         amdgpu_ring_write(ring, lower_32_bits(seq));
4668
4669         if (flags & AMDGPU_FENCE_FLAG_INT) {
4670                 /* set register to trigger INT */
4671                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4672                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4673                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4674                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
4675                 amdgpu_ring_write(ring, 0);
4676                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4677         }
4678 }
4679
4680 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
4681 {
4682         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4683         amdgpu_ring_write(ring, 0);
4684 }
4685
4686 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
4687 {
4688         struct v9_ce_ib_state ce_payload = {0};
4689         uint64_t csa_addr;
4690         int cnt;
4691
4692         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
4693         csa_addr = amdgpu_csa_vaddr(ring->adev);
4694
4695         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4696         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4697                                  WRITE_DATA_DST_SEL(8) |
4698                                  WR_CONFIRM) |
4699                                  WRITE_DATA_CACHE_POLICY(0));
4700         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4701         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4702         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
4703 }
4704
4705 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
4706 {
4707         struct v9_de_ib_state de_payload = {0};
4708         uint64_t csa_addr, gds_addr;
4709         int cnt;
4710
4711         csa_addr = amdgpu_csa_vaddr(ring->adev);
4712         gds_addr = csa_addr + 4096;
4713         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
4714         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
4715
4716         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
4717         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4718         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4719                                  WRITE_DATA_DST_SEL(8) |
4720                                  WR_CONFIRM) |
4721                                  WRITE_DATA_CACHE_POLICY(0));
4722         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4723         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4724         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
4725 }
4726
4727 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
4728 {
4729         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4730         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
4731 }
4732
4733 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
4734 {
4735         uint32_t dw2 = 0;
4736
4737         if (amdgpu_sriov_vf(ring->adev))
4738                 gfx_v9_0_ring_emit_ce_meta(ring);
4739
4740         gfx_v9_0_ring_emit_tmz(ring, true);
4741
4742         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4743         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4744                 /* set load_global_config & load_global_uconfig */
4745                 dw2 |= 0x8001;
4746                 /* set load_cs_sh_regs */
4747                 dw2 |= 0x01000000;
4748                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
4749                 dw2 |= 0x10002;
4750
4751                 /* set load_ce_ram if preamble presented */
4752                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
4753                         dw2 |= 0x10000000;
4754         } else {
4755                 /* still load_ce_ram if this is the first time preamble presented
4756                  * although there is no context switch happens.
4757                  */
4758                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
4759                         dw2 |= 0x10000000;
4760         }
4761
4762         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4763         amdgpu_ring_write(ring, dw2);
4764         amdgpu_ring_write(ring, 0);
4765 }
4766
4767 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
4768 {
4769         unsigned ret;
4770         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4771         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
4772         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
4773         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4774         ret = ring->wptr & ring->buf_mask;
4775         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
4776         return ret;
4777 }
4778
4779 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
4780 {
4781         unsigned cur;
4782         BUG_ON(offset > ring->buf_mask);
4783         BUG_ON(ring->ring[offset] != 0x55aa55aa);
4784
4785         cur = (ring->wptr & ring->buf_mask) - 1;
4786         if (likely(cur > offset))
4787                 ring->ring[offset] = cur - offset;
4788         else
4789                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
4790 }
4791
4792 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
4793 {
4794         struct amdgpu_device *adev = ring->adev;
4795
4796         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4797         amdgpu_ring_write(ring, 0 |     /* src: register*/
4798                                 (5 << 8) |      /* dst: memory */
4799                                 (1 << 20));     /* write confirm */
4800         amdgpu_ring_write(ring, reg);
4801         amdgpu_ring_write(ring, 0);
4802         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4803                                 adev->virt.reg_val_offs * 4));
4804         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4805                                 adev->virt.reg_val_offs * 4));
4806 }
4807
4808 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
4809                                     uint32_t val)
4810 {
4811         uint32_t cmd = 0;
4812
4813         switch (ring->funcs->type) {
4814         case AMDGPU_RING_TYPE_GFX:
4815                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4816                 break;
4817         case AMDGPU_RING_TYPE_KIQ:
4818                 cmd = (1 << 16); /* no inc addr */
4819                 break;
4820         default:
4821                 cmd = WR_CONFIRM;
4822                 break;
4823         }
4824         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4825         amdgpu_ring_write(ring, cmd);
4826         amdgpu_ring_write(ring, reg);
4827         amdgpu_ring_write(ring, 0);
4828         amdgpu_ring_write(ring, val);
4829 }
4830
4831 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4832                                         uint32_t val, uint32_t mask)
4833 {
4834         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4835 }
4836
4837 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4838                                                   uint32_t reg0, uint32_t reg1,
4839                                                   uint32_t ref, uint32_t mask)
4840 {
4841         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4842         struct amdgpu_device *adev = ring->adev;
4843         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
4844                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
4845
4846         if (fw_version_ok)
4847                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4848                                       ref, mask, 0x20);
4849         else
4850                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
4851                                                            ref, mask);
4852 }
4853
4854 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4855 {
4856         struct amdgpu_device *adev = ring->adev;
4857         uint32_t value = 0;
4858
4859         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4860         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4861         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4862         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4863         WREG32(mmSQ_CMD, value);
4864 }
4865
4866 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4867                                                  enum amdgpu_interrupt_state state)
4868 {
4869         switch (state) {
4870         case AMDGPU_IRQ_STATE_DISABLE:
4871         case AMDGPU_IRQ_STATE_ENABLE:
4872                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4873                                TIME_STAMP_INT_ENABLE,
4874                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4875                 break;
4876         default:
4877                 break;
4878         }
4879 }
4880
4881 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4882                                                      int me, int pipe,
4883                                                      enum amdgpu_interrupt_state state)
4884 {
4885         u32 mec_int_cntl, mec_int_cntl_reg;
4886
4887         /*
4888          * amdgpu controls only the first MEC. That's why this function only
4889          * handles the setting of interrupts for this specific MEC. All other
4890          * pipes' interrupts are set by amdkfd.
4891          */
4892
4893         if (me == 1) {
4894                 switch (pipe) {
4895                 case 0:
4896                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4897                         break;
4898                 case 1:
4899                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
4900                         break;
4901                 case 2:
4902                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
4903                         break;
4904                 case 3:
4905                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
4906                         break;
4907                 default:
4908                         DRM_DEBUG("invalid pipe %d\n", pipe);
4909                         return;
4910                 }
4911         } else {
4912                 DRM_DEBUG("invalid me %d\n", me);
4913                 return;
4914         }
4915
4916         switch (state) {
4917         case AMDGPU_IRQ_STATE_DISABLE:
4918                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4919                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4920                                              TIME_STAMP_INT_ENABLE, 0);
4921                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4922                 break;
4923         case AMDGPU_IRQ_STATE_ENABLE:
4924                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4925                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4926                                              TIME_STAMP_INT_ENABLE, 1);
4927                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4928                 break;
4929         default:
4930                 break;
4931         }
4932 }
4933
4934 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4935                                              struct amdgpu_irq_src *source,
4936                                              unsigned type,
4937                                              enum amdgpu_interrupt_state state)
4938 {
4939         switch (state) {
4940         case AMDGPU_IRQ_STATE_DISABLE:
4941         case AMDGPU_IRQ_STATE_ENABLE:
4942                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4943                                PRIV_REG_INT_ENABLE,
4944                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4945                 break;
4946         default:
4947                 break;
4948         }
4949
4950         return 0;
4951 }
4952
4953 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4954                                               struct amdgpu_irq_src *source,
4955                                               unsigned type,
4956                                               enum amdgpu_interrupt_state state)
4957 {
4958         switch (state) {
4959         case AMDGPU_IRQ_STATE_DISABLE:
4960         case AMDGPU_IRQ_STATE_ENABLE:
4961                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4962                                PRIV_INSTR_INT_ENABLE,
4963                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4964         default:
4965                 break;
4966         }
4967
4968         return 0;
4969 }
4970
4971 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
4972         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4973                         CP_ECC_ERROR_INT_ENABLE, 1)
4974
4975 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
4976         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4977                         CP_ECC_ERROR_INT_ENABLE, 0)
4978
4979 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
4980                                               struct amdgpu_irq_src *source,
4981                                               unsigned type,
4982                                               enum amdgpu_interrupt_state state)
4983 {
4984         switch (state) {
4985         case AMDGPU_IRQ_STATE_DISABLE:
4986                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4987                                 CP_ECC_ERROR_INT_ENABLE, 0);
4988                 DISABLE_ECC_ON_ME_PIPE(1, 0);
4989                 DISABLE_ECC_ON_ME_PIPE(1, 1);
4990                 DISABLE_ECC_ON_ME_PIPE(1, 2);
4991                 DISABLE_ECC_ON_ME_PIPE(1, 3);
4992                 break;
4993
4994         case AMDGPU_IRQ_STATE_ENABLE:
4995                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4996                                 CP_ECC_ERROR_INT_ENABLE, 1);
4997                 ENABLE_ECC_ON_ME_PIPE(1, 0);
4998                 ENABLE_ECC_ON_ME_PIPE(1, 1);
4999                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5000                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5001                 break;
5002         default:
5003                 break;
5004         }
5005
5006         return 0;
5007 }
5008
5009
5010 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5011                                             struct amdgpu_irq_src *src,
5012                                             unsigned type,
5013                                             enum amdgpu_interrupt_state state)
5014 {
5015         switch (type) {
5016         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5017                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5018                 break;
5019         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5020                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5021                 break;
5022         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5023                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5024                 break;
5025         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5026                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5027                 break;
5028         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5029                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5030                 break;
5031         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5032                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5033                 break;
5034         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5035                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5036                 break;
5037         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5038                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5039                 break;
5040         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5041                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5042                 break;
5043         default:
5044                 break;
5045         }
5046         return 0;
5047 }
5048
5049 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5050                             struct amdgpu_irq_src *source,
5051                             struct amdgpu_iv_entry *entry)
5052 {
5053         int i;
5054         u8 me_id, pipe_id, queue_id;
5055         struct amdgpu_ring *ring;
5056
5057         DRM_DEBUG("IH: CP EOP\n");
5058         me_id = (entry->ring_id & 0x0c) >> 2;
5059         pipe_id = (entry->ring_id & 0x03) >> 0;
5060         queue_id = (entry->ring_id & 0x70) >> 4;
5061
5062         switch (me_id) {
5063         case 0:
5064                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5065                 break;
5066         case 1:
5067         case 2:
5068                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5069                         ring = &adev->gfx.compute_ring[i];
5070                         /* Per-queue interrupt is supported for MEC starting from VI.
5071                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5072                           */
5073                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5074                                 amdgpu_fence_process(ring);
5075                 }
5076                 break;
5077         }
5078         return 0;
5079 }
5080
5081 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5082                            struct amdgpu_iv_entry *entry)
5083 {
5084         u8 me_id, pipe_id, queue_id;
5085         struct amdgpu_ring *ring;
5086         int i;
5087
5088         me_id = (entry->ring_id & 0x0c) >> 2;
5089         pipe_id = (entry->ring_id & 0x03) >> 0;
5090         queue_id = (entry->ring_id & 0x70) >> 4;
5091
5092         switch (me_id) {
5093         case 0:
5094                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5095                 break;
5096         case 1:
5097         case 2:
5098                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5099                         ring = &adev->gfx.compute_ring[i];
5100                         if (ring->me == me_id && ring->pipe == pipe_id &&
5101                             ring->queue == queue_id)
5102                                 drm_sched_fault(&ring->sched);
5103                 }
5104                 break;
5105         }
5106 }
5107
5108 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5109                                  struct amdgpu_irq_src *source,
5110                                  struct amdgpu_iv_entry *entry)
5111 {
5112         DRM_ERROR("Illegal register access in command stream\n");
5113         gfx_v9_0_fault(adev, entry);
5114         return 0;
5115 }
5116
5117 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5118                                   struct amdgpu_irq_src *source,
5119                                   struct amdgpu_iv_entry *entry)
5120 {
5121         DRM_ERROR("Illegal instruction in command stream\n");
5122         gfx_v9_0_fault(adev, entry);
5123         return 0;
5124 }
5125
5126 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5127                 struct amdgpu_iv_entry *entry)
5128 {
5129         /* TODO ue will trigger an interrupt. */
5130         kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5131         amdgpu_ras_reset_gpu(adev, 0);
5132         return AMDGPU_RAS_UE;
5133 }
5134
5135 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
5136                                   struct amdgpu_irq_src *source,
5137                                   struct amdgpu_iv_entry *entry)
5138 {
5139         struct ras_common_if *ras_if = adev->gfx.ras_if;
5140         struct ras_dispatch_if ih_data = {
5141                 .entry = entry,
5142         };
5143
5144         if (!ras_if)
5145                 return 0;
5146
5147         ih_data.head = *ras_if;
5148
5149         DRM_ERROR("CP ECC ERROR IRQ\n");
5150         amdgpu_ras_interrupt_dispatch(adev, &ih_data);
5151         return 0;
5152 }
5153
5154 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
5155         .name = "gfx_v9_0",
5156         .early_init = gfx_v9_0_early_init,
5157         .late_init = gfx_v9_0_late_init,
5158         .sw_init = gfx_v9_0_sw_init,
5159         .sw_fini = gfx_v9_0_sw_fini,
5160         .hw_init = gfx_v9_0_hw_init,
5161         .hw_fini = gfx_v9_0_hw_fini,
5162         .suspend = gfx_v9_0_suspend,
5163         .resume = gfx_v9_0_resume,
5164         .is_idle = gfx_v9_0_is_idle,
5165         .wait_for_idle = gfx_v9_0_wait_for_idle,
5166         .soft_reset = gfx_v9_0_soft_reset,
5167         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
5168         .set_powergating_state = gfx_v9_0_set_powergating_state,
5169         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
5170 };
5171
5172 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
5173         .type = AMDGPU_RING_TYPE_GFX,
5174         .align_mask = 0xff,
5175         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5176         .support_64bit_ptrs = true,
5177         .vmhub = AMDGPU_GFXHUB,
5178         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
5179         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
5180         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
5181         .emit_frame_size = /* totally 242 maximum if 16 IBs */
5182                 5 +  /* COND_EXEC */
5183                 7 +  /* PIPELINE_SYNC */
5184                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5185                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5186                 2 + /* VM_FLUSH */
5187                 8 +  /* FENCE for VM_FLUSH */
5188                 20 + /* GDS switch */
5189                 4 + /* double SWITCH_BUFFER,
5190                        the first COND_EXEC jump to the place just
5191                            prior to this double SWITCH_BUFFER  */
5192                 5 + /* COND_EXEC */
5193                 7 +      /*     HDP_flush */
5194                 4 +      /*     VGT_flush */
5195                 14 + /* CE_META */
5196                 31 + /* DE_META */
5197                 3 + /* CNTX_CTRL */
5198                 5 + /* HDP_INVL */
5199                 8 + 8 + /* FENCE x2 */
5200                 2, /* SWITCH_BUFFER */
5201         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
5202         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
5203         .emit_fence = gfx_v9_0_ring_emit_fence,
5204         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5205         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5206         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5207         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5208         .test_ring = gfx_v9_0_ring_test_ring,
5209         .test_ib = gfx_v9_0_ring_test_ib,
5210         .insert_nop = amdgpu_ring_insert_nop,
5211         .pad_ib = amdgpu_ring_generic_pad_ib,
5212         .emit_switch_buffer = gfx_v9_ring_emit_sb,
5213         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
5214         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
5215         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
5216         .emit_tmz = gfx_v9_0_ring_emit_tmz,
5217         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5218         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5219         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5220         .soft_recovery = gfx_v9_0_ring_soft_recovery,
5221 };
5222
5223 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
5224         .type = AMDGPU_RING_TYPE_COMPUTE,
5225         .align_mask = 0xff,
5226         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5227         .support_64bit_ptrs = true,
5228         .vmhub = AMDGPU_GFXHUB,
5229         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5230         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5231         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5232         .emit_frame_size =
5233                 20 + /* gfx_v9_0_ring_emit_gds_switch */
5234                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
5235                 5 + /* hdp invalidate */
5236                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5237                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5238                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5239                 2 + /* gfx_v9_0_ring_emit_vm_flush */
5240                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
5241         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
5242         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
5243         .emit_fence = gfx_v9_0_ring_emit_fence,
5244         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5245         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5246         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5247         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5248         .test_ring = gfx_v9_0_ring_test_ring,
5249         .test_ib = gfx_v9_0_ring_test_ib,
5250         .insert_nop = amdgpu_ring_insert_nop,
5251         .pad_ib = amdgpu_ring_generic_pad_ib,
5252         .set_priority = gfx_v9_0_ring_set_priority_compute,
5253         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5254         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5255         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5256 };
5257
5258 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
5259         .type = AMDGPU_RING_TYPE_KIQ,
5260         .align_mask = 0xff,
5261         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5262         .support_64bit_ptrs = true,
5263         .vmhub = AMDGPU_GFXHUB,
5264         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5265         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5266         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5267         .emit_frame_size =
5268                 20 + /* gfx_v9_0_ring_emit_gds_switch */
5269                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
5270                 5 + /* hdp invalidate */
5271                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5272                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5273                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5274                 2 + /* gfx_v9_0_ring_emit_vm_flush */
5275                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5276         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
5277         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
5278         .test_ring = gfx_v9_0_ring_test_ring,
5279         .insert_nop = amdgpu_ring_insert_nop,
5280         .pad_ib = amdgpu_ring_generic_pad_ib,
5281         .emit_rreg = gfx_v9_0_ring_emit_rreg,
5282         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5283         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5284         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5285 };
5286
5287 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
5288 {
5289         int i;
5290
5291         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
5292
5293         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5294                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
5295
5296         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5297                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
5298 }
5299
5300 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
5301         .set = gfx_v9_0_set_eop_interrupt_state,
5302         .process = gfx_v9_0_eop_irq,
5303 };
5304
5305 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
5306         .set = gfx_v9_0_set_priv_reg_fault_state,
5307         .process = gfx_v9_0_priv_reg_irq,
5308 };
5309
5310 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
5311         .set = gfx_v9_0_set_priv_inst_fault_state,
5312         .process = gfx_v9_0_priv_inst_irq,
5313 };
5314
5315 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
5316         .set = gfx_v9_0_set_cp_ecc_error_state,
5317         .process = gfx_v9_0_cp_ecc_error_irq,
5318 };
5319
5320
5321 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
5322 {
5323         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5324         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
5325
5326         adev->gfx.priv_reg_irq.num_types = 1;
5327         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
5328
5329         adev->gfx.priv_inst_irq.num_types = 1;
5330         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
5331
5332         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
5333         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
5334 }
5335
5336 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
5337 {
5338         switch (adev->asic_type) {
5339         case CHIP_VEGA10:
5340         case CHIP_VEGA12:
5341         case CHIP_VEGA20:
5342         case CHIP_RAVEN:
5343                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
5344                 break;
5345         default:
5346                 break;
5347         }
5348 }
5349
5350 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
5351 {
5352         /* init asci gds info */
5353         switch (adev->asic_type) {
5354         case CHIP_VEGA10:
5355         case CHIP_VEGA12:
5356         case CHIP_VEGA20:
5357                 adev->gds.gds_size = 0x10000;
5358                 break;
5359         case CHIP_RAVEN:
5360                 adev->gds.gds_size = 0x1000;
5361                 break;
5362         default:
5363                 adev->gds.gds_size = 0x10000;
5364                 break;
5365         }
5366
5367         switch (adev->asic_type) {
5368         case CHIP_VEGA10:
5369         case CHIP_VEGA20:
5370                 adev->gds.gds_compute_max_wave_id = 0x7ff;
5371                 break;
5372         case CHIP_VEGA12:
5373                 adev->gds.gds_compute_max_wave_id = 0x27f;
5374                 break;
5375         case CHIP_RAVEN:
5376                 if (adev->rev_id >= 0x8)
5377                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
5378                 else
5379                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
5380                 break;
5381         default:
5382                 /* this really depends on the chip */
5383                 adev->gds.gds_compute_max_wave_id = 0x7ff;
5384                 break;
5385         }
5386
5387         adev->gds.gws_size = 64;
5388         adev->gds.oa_size = 16;
5389 }
5390
5391 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
5392                                                  u32 bitmap)
5393 {
5394         u32 data;
5395
5396         if (!bitmap)
5397                 return;
5398
5399         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5400         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5401
5402         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
5403 }
5404
5405 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5406 {
5407         u32 data, mask;
5408
5409         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
5410         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
5411
5412         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5413         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5414
5415         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
5416
5417         return (~data) & mask;
5418 }
5419
5420 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
5421                                  struct amdgpu_cu_info *cu_info)
5422 {
5423         int i, j, k, counter, active_cu_number = 0;
5424         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5425         unsigned disable_masks[4 * 2];
5426
5427         if (!adev || !cu_info)
5428                 return -EINVAL;
5429
5430         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5431
5432         mutex_lock(&adev->grbm_idx_mutex);
5433         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5434                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5435                         mask = 1;
5436                         ao_bitmap = 0;
5437                         counter = 0;
5438                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
5439                         if (i < 4 && j < 2)
5440                                 gfx_v9_0_set_user_cu_inactive_bitmap(
5441                                         adev, disable_masks[i * 2 + j]);
5442                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
5443                         cu_info->bitmap[i][j] = bitmap;
5444
5445                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5446                                 if (bitmap & mask) {
5447                                         if (counter < adev->gfx.config.max_cu_per_sh)
5448                                                 ao_bitmap |= mask;
5449                                         counter ++;
5450                                 }
5451                                 mask <<= 1;
5452                         }
5453                         active_cu_number += counter;
5454                         if (i < 2 && j < 2)
5455                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5456                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5457                 }
5458         }
5459         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5460         mutex_unlock(&adev->grbm_idx_mutex);
5461
5462         cu_info->number = active_cu_number;
5463         cu_info->ao_cu_mask = ao_cu_mask;
5464         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5465
5466         return 0;
5467 }
5468
5469 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
5470 {
5471         .type = AMD_IP_BLOCK_TYPE_GFX,
5472         .major = 9,
5473         .minor = 0,
5474         .rev = 0,
5475         .funcs = &gfx_v9_0_ip_funcs,
5476 };