]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drm/amdgpu/gfx9: use reset default for PA_SC_FIFO_SIZE
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "hdp/hdp_4_0_offset.h"
41
42 #include "soc15.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55
56 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
62
63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
69
70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
76
77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
83
84 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
86 MODULE_FIRMWARE("amdgpu/raven_me.bin");
87 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
90
91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
98
99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
106
107 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
108 {
109         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
110         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
111         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
112         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
113         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
114         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
115         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
116         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
117         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
118         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
119         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
120         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
121         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
122         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
123         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
124         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
125         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
126         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
127         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
128         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
129 };
130
131 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
132 {
133         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
134         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
135         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
136         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
137         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
138         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
139         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
140         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
141         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
142         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
143         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
144         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
145         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
146         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
147         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
148         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
149         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
150         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
151 };
152
153 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
154 {
155         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
156         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
157         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
158         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
159         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
160         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
161         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
162         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
163         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
164         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
165         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
166 };
167
168 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
169 {
170         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
171         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
172         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
173         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
174         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
175         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
176         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
177         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
178         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
179         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
180         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
181         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
182         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
183         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
184         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
185         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
186         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
187         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
188         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
189         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
190         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
191         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
192         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
193         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
194 };
195
196 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
197 {
198         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
199         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
200         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
201         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
202         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
203         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
204         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
205 };
206
207 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
208 {
209         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
210         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
211         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
212         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
213         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
214         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
215         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
216         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
217         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
218         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
219         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
220         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
221         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
222         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
223         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
224         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
225         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
226         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
227         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
228 };
229
230 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
231 {
232         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
233         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
234         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
235 };
236
237 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
238 {
239         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
240         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
241         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
242         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
243         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
244         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
245         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
246         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
247         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
248         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
249         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
250         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
251         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
252         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
253         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
254         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
255 };
256
257 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
258 {
259         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
260         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
261         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
262         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
263         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
264         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
265         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
266         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
267         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
268         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
269         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
270         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
271         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
272 };
273
274 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
275 {
276         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
277         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
278         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
279         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
280         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
281         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
282         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
283         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
284 };
285
286 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
287 {
288         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
289         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
290         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
291         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
292         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
293         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
294         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
295         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
296 };
297
298 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
299 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
300 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
301 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
302
303 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
304 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
305 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
306 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
307 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
308                                  struct amdgpu_cu_info *cu_info);
309 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
310 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
311 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
312 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
313
314 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
315 {
316         switch (adev->asic_type) {
317         case CHIP_VEGA10:
318                 if (!amdgpu_virt_support_skip_setting(adev)) {
319                         soc15_program_register_sequence(adev,
320                                                          golden_settings_gc_9_0,
321                                                          ARRAY_SIZE(golden_settings_gc_9_0));
322                         soc15_program_register_sequence(adev,
323                                                          golden_settings_gc_9_0_vg10,
324                                                          ARRAY_SIZE(golden_settings_gc_9_0_vg10));
325                 }
326                 break;
327         case CHIP_VEGA12:
328                 soc15_program_register_sequence(adev,
329                                                 golden_settings_gc_9_2_1,
330                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
331                 soc15_program_register_sequence(adev,
332                                                 golden_settings_gc_9_2_1_vg12,
333                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
334                 break;
335         case CHIP_VEGA20:
336                 soc15_program_register_sequence(adev,
337                                                 golden_settings_gc_9_0,
338                                                 ARRAY_SIZE(golden_settings_gc_9_0));
339                 soc15_program_register_sequence(adev,
340                                                 golden_settings_gc_9_0_vg20,
341                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
342                 break;
343         case CHIP_RAVEN:
344                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
345                                                 ARRAY_SIZE(golden_settings_gc_9_1));
346                 if (adev->rev_id >= 8)
347                         soc15_program_register_sequence(adev,
348                                                         golden_settings_gc_9_1_rv2,
349                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
350                 else
351                         soc15_program_register_sequence(adev,
352                                                         golden_settings_gc_9_1_rv1,
353                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
354                 break;
355         default:
356                 break;
357         }
358
359         soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
360                                         (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
361 }
362
363 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
364 {
365         adev->gfx.scratch.num_reg = 8;
366         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
367         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
368 }
369
370 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
371                                        bool wc, uint32_t reg, uint32_t val)
372 {
373         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
374         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
375                                 WRITE_DATA_DST_SEL(0) |
376                                 (wc ? WR_CONFIRM : 0));
377         amdgpu_ring_write(ring, reg);
378         amdgpu_ring_write(ring, 0);
379         amdgpu_ring_write(ring, val);
380 }
381
382 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
383                                   int mem_space, int opt, uint32_t addr0,
384                                   uint32_t addr1, uint32_t ref, uint32_t mask,
385                                   uint32_t inv)
386 {
387         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
388         amdgpu_ring_write(ring,
389                                  /* memory (1) or register (0) */
390                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
391                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
392                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
393                                  WAIT_REG_MEM_ENGINE(eng_sel)));
394
395         if (mem_space)
396                 BUG_ON(addr0 & 0x3); /* Dword align */
397         amdgpu_ring_write(ring, addr0);
398         amdgpu_ring_write(ring, addr1);
399         amdgpu_ring_write(ring, ref);
400         amdgpu_ring_write(ring, mask);
401         amdgpu_ring_write(ring, inv); /* poll interval */
402 }
403
404 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
405 {
406         struct amdgpu_device *adev = ring->adev;
407         uint32_t scratch;
408         uint32_t tmp = 0;
409         unsigned i;
410         int r;
411
412         r = amdgpu_gfx_scratch_get(adev, &scratch);
413         if (r)
414                 return r;
415
416         WREG32(scratch, 0xCAFEDEAD);
417         r = amdgpu_ring_alloc(ring, 3);
418         if (r)
419                 goto error_free_scratch;
420
421         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
422         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
423         amdgpu_ring_write(ring, 0xDEADBEEF);
424         amdgpu_ring_commit(ring);
425
426         for (i = 0; i < adev->usec_timeout; i++) {
427                 tmp = RREG32(scratch);
428                 if (tmp == 0xDEADBEEF)
429                         break;
430                 udelay(1);
431         }
432
433         if (i >= adev->usec_timeout)
434                 r = -ETIMEDOUT;
435
436 error_free_scratch:
437         amdgpu_gfx_scratch_free(adev, scratch);
438         return r;
439 }
440
441 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
442 {
443         struct amdgpu_device *adev = ring->adev;
444         struct amdgpu_ib ib;
445         struct dma_fence *f = NULL;
446
447         unsigned index;
448         uint64_t gpu_addr;
449         uint32_t tmp;
450         long r;
451
452         r = amdgpu_device_wb_get(adev, &index);
453         if (r)
454                 return r;
455
456         gpu_addr = adev->wb.gpu_addr + (index * 4);
457         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
458         memset(&ib, 0, sizeof(ib));
459         r = amdgpu_ib_get(adev, NULL, 16, &ib);
460         if (r)
461                 goto err1;
462
463         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
464         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
465         ib.ptr[2] = lower_32_bits(gpu_addr);
466         ib.ptr[3] = upper_32_bits(gpu_addr);
467         ib.ptr[4] = 0xDEADBEEF;
468         ib.length_dw = 5;
469
470         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
471         if (r)
472                 goto err2;
473
474         r = dma_fence_wait_timeout(f, false, timeout);
475         if (r == 0) {
476                 r = -ETIMEDOUT;
477                 goto err2;
478         } else if (r < 0) {
479                 goto err2;
480         }
481
482         tmp = adev->wb.wb[index];
483         if (tmp == 0xDEADBEEF)
484                 r = 0;
485         else
486                 r = -EINVAL;
487
488 err2:
489         amdgpu_ib_free(adev, &ib, NULL);
490         dma_fence_put(f);
491 err1:
492         amdgpu_device_wb_free(adev, index);
493         return r;
494 }
495
496
497 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
498 {
499         release_firmware(adev->gfx.pfp_fw);
500         adev->gfx.pfp_fw = NULL;
501         release_firmware(adev->gfx.me_fw);
502         adev->gfx.me_fw = NULL;
503         release_firmware(adev->gfx.ce_fw);
504         adev->gfx.ce_fw = NULL;
505         release_firmware(adev->gfx.rlc_fw);
506         adev->gfx.rlc_fw = NULL;
507         release_firmware(adev->gfx.mec_fw);
508         adev->gfx.mec_fw = NULL;
509         release_firmware(adev->gfx.mec2_fw);
510         adev->gfx.mec2_fw = NULL;
511
512         kfree(adev->gfx.rlc.register_list_format);
513 }
514
515 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
516 {
517         const struct rlc_firmware_header_v2_1 *rlc_hdr;
518
519         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
520         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
521         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
522         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
523         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
524         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
525         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
526         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
527         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
528         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
529         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
530         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
531         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
532         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
533                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
534 }
535
536 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
537 {
538         adev->gfx.me_fw_write_wait = false;
539         adev->gfx.mec_fw_write_wait = false;
540
541         switch (adev->asic_type) {
542         case CHIP_VEGA10:
543                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
544                     (adev->gfx.me_feature_version >= 42) &&
545                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
546                     (adev->gfx.pfp_feature_version >= 42))
547                         adev->gfx.me_fw_write_wait = true;
548
549                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
550                     (adev->gfx.mec_feature_version >= 42))
551                         adev->gfx.mec_fw_write_wait = true;
552                 break;
553         case CHIP_VEGA12:
554                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
555                     (adev->gfx.me_feature_version >= 44) &&
556                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
557                     (adev->gfx.pfp_feature_version >= 44))
558                         adev->gfx.me_fw_write_wait = true;
559
560                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
561                     (adev->gfx.mec_feature_version >= 44))
562                         adev->gfx.mec_fw_write_wait = true;
563                 break;
564         case CHIP_VEGA20:
565                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
566                     (adev->gfx.me_feature_version >= 44) &&
567                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
568                     (adev->gfx.pfp_feature_version >= 44))
569                         adev->gfx.me_fw_write_wait = true;
570
571                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
572                     (adev->gfx.mec_feature_version >= 44))
573                         adev->gfx.mec_fw_write_wait = true;
574                 break;
575         case CHIP_RAVEN:
576                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
577                     (adev->gfx.me_feature_version >= 42) &&
578                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
579                     (adev->gfx.pfp_feature_version >= 42))
580                         adev->gfx.me_fw_write_wait = true;
581
582                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
583                     (adev->gfx.mec_feature_version >= 42))
584                         adev->gfx.mec_fw_write_wait = true;
585                 break;
586         default:
587                 break;
588         }
589 }
590
591 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
592 {
593         switch (adev->asic_type) {
594         case CHIP_VEGA10:
595         case CHIP_VEGA12:
596         case CHIP_VEGA20:
597                 break;
598         case CHIP_RAVEN:
599                 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
600                         break;
601                 if ((adev->gfx.rlc_fw_version != 106 &&
602                      adev->gfx.rlc_fw_version < 531) ||
603                     (adev->gfx.rlc_fw_version == 53815) ||
604                     (adev->gfx.rlc_feature_version < 1) ||
605                     !adev->gfx.rlc.is_rlc_v2_1)
606                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
607                 break;
608         default:
609                 break;
610         }
611 }
612
613 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
614 {
615         const char *chip_name;
616         char fw_name[30];
617         int err;
618         struct amdgpu_firmware_info *info = NULL;
619         const struct common_firmware_header *header = NULL;
620         const struct gfx_firmware_header_v1_0 *cp_hdr;
621         const struct rlc_firmware_header_v2_0 *rlc_hdr;
622         unsigned int *tmp = NULL;
623         unsigned int i = 0;
624         uint16_t version_major;
625         uint16_t version_minor;
626         uint32_t smu_version;
627
628         DRM_DEBUG("\n");
629
630         switch (adev->asic_type) {
631         case CHIP_VEGA10:
632                 chip_name = "vega10";
633                 break;
634         case CHIP_VEGA12:
635                 chip_name = "vega12";
636                 break;
637         case CHIP_VEGA20:
638                 chip_name = "vega20";
639                 break;
640         case CHIP_RAVEN:
641                 if (adev->rev_id >= 8)
642                         chip_name = "raven2";
643                 else if (adev->pdev->device == 0x15d8)
644                         chip_name = "picasso";
645                 else
646                         chip_name = "raven";
647                 break;
648         default:
649                 BUG();
650         }
651
652         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
653         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
654         if (err)
655                 goto out;
656         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
657         if (err)
658                 goto out;
659         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
660         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
661         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
662
663         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
664         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
665         if (err)
666                 goto out;
667         err = amdgpu_ucode_validate(adev->gfx.me_fw);
668         if (err)
669                 goto out;
670         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
671         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
672         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
673
674         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
675         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
676         if (err)
677                 goto out;
678         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
679         if (err)
680                 goto out;
681         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
682         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
683         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
684
685         /*
686          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
687          * instead of picasso_rlc.bin.
688          * Judgment method:
689          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
690          *          or revision >= 0xD8 && revision <= 0xDF
691          * otherwise is PCO FP5
692          */
693         if (!strcmp(chip_name, "picasso") &&
694                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
695                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
696                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
697         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
698                 (smu_version >= 0x41e2b))
699                 /**
700                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
701                 */
702                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
703         else
704                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
705         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
706         if (err)
707                 goto out;
708         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
709         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
710
711         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
712         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
713         if (version_major == 2 && version_minor == 1)
714                 adev->gfx.rlc.is_rlc_v2_1 = true;
715
716         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
717         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
718         adev->gfx.rlc.save_and_restore_offset =
719                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
720         adev->gfx.rlc.clear_state_descriptor_offset =
721                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
722         adev->gfx.rlc.avail_scratch_ram_locations =
723                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
724         adev->gfx.rlc.reg_restore_list_size =
725                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
726         adev->gfx.rlc.reg_list_format_start =
727                         le32_to_cpu(rlc_hdr->reg_list_format_start);
728         adev->gfx.rlc.reg_list_format_separate_start =
729                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
730         adev->gfx.rlc.starting_offsets_start =
731                         le32_to_cpu(rlc_hdr->starting_offsets_start);
732         adev->gfx.rlc.reg_list_format_size_bytes =
733                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
734         adev->gfx.rlc.reg_list_size_bytes =
735                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
736         adev->gfx.rlc.register_list_format =
737                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
738                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
739         if (!adev->gfx.rlc.register_list_format) {
740                 err = -ENOMEM;
741                 goto out;
742         }
743
744         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
745                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
746         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
747                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
748
749         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
750
751         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
752                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
753         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
754                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
755
756         if (adev->gfx.rlc.is_rlc_v2_1)
757                 gfx_v9_0_init_rlc_ext_microcode(adev);
758
759         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
760         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
761         if (err)
762                 goto out;
763         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
764         if (err)
765                 goto out;
766         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
767         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
768         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
769
770
771         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
772         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
773         if (!err) {
774                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
775                 if (err)
776                         goto out;
777                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
778                 adev->gfx.mec2_fw->data;
779                 adev->gfx.mec2_fw_version =
780                 le32_to_cpu(cp_hdr->header.ucode_version);
781                 adev->gfx.mec2_feature_version =
782                 le32_to_cpu(cp_hdr->ucode_feature_version);
783         } else {
784                 err = 0;
785                 adev->gfx.mec2_fw = NULL;
786         }
787
788         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
789                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
790                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
791                 info->fw = adev->gfx.pfp_fw;
792                 header = (const struct common_firmware_header *)info->fw->data;
793                 adev->firmware.fw_size +=
794                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
795
796                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
797                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
798                 info->fw = adev->gfx.me_fw;
799                 header = (const struct common_firmware_header *)info->fw->data;
800                 adev->firmware.fw_size +=
801                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
802
803                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
804                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
805                 info->fw = adev->gfx.ce_fw;
806                 header = (const struct common_firmware_header *)info->fw->data;
807                 adev->firmware.fw_size +=
808                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
809
810                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
811                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
812                 info->fw = adev->gfx.rlc_fw;
813                 header = (const struct common_firmware_header *)info->fw->data;
814                 adev->firmware.fw_size +=
815                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
816
817                 if (adev->gfx.rlc.is_rlc_v2_1 &&
818                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
819                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
820                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
821                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
822                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
823                         info->fw = adev->gfx.rlc_fw;
824                         adev->firmware.fw_size +=
825                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
826
827                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
828                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
829                         info->fw = adev->gfx.rlc_fw;
830                         adev->firmware.fw_size +=
831                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
832
833                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
834                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
835                         info->fw = adev->gfx.rlc_fw;
836                         adev->firmware.fw_size +=
837                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
838                 }
839
840                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
841                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
842                 info->fw = adev->gfx.mec_fw;
843                 header = (const struct common_firmware_header *)info->fw->data;
844                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
845                 adev->firmware.fw_size +=
846                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
847
848                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
849                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
850                 info->fw = adev->gfx.mec_fw;
851                 adev->firmware.fw_size +=
852                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
853
854                 if (adev->gfx.mec2_fw) {
855                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
856                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
857                         info->fw = adev->gfx.mec2_fw;
858                         header = (const struct common_firmware_header *)info->fw->data;
859                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
860                         adev->firmware.fw_size +=
861                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
862                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
863                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
864                         info->fw = adev->gfx.mec2_fw;
865                         adev->firmware.fw_size +=
866                                 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
867                 }
868
869         }
870
871 out:
872         gfx_v9_0_check_if_need_gfxoff(adev);
873         gfx_v9_0_check_fw_write_wait(adev);
874         if (err) {
875                 dev_err(adev->dev,
876                         "gfx9: Failed to load firmware \"%s\"\n",
877                         fw_name);
878                 release_firmware(adev->gfx.pfp_fw);
879                 adev->gfx.pfp_fw = NULL;
880                 release_firmware(adev->gfx.me_fw);
881                 adev->gfx.me_fw = NULL;
882                 release_firmware(adev->gfx.ce_fw);
883                 adev->gfx.ce_fw = NULL;
884                 release_firmware(adev->gfx.rlc_fw);
885                 adev->gfx.rlc_fw = NULL;
886                 release_firmware(adev->gfx.mec_fw);
887                 adev->gfx.mec_fw = NULL;
888                 release_firmware(adev->gfx.mec2_fw);
889                 adev->gfx.mec2_fw = NULL;
890         }
891         return err;
892 }
893
894 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
895 {
896         u32 count = 0;
897         const struct cs_section_def *sect = NULL;
898         const struct cs_extent_def *ext = NULL;
899
900         /* begin clear state */
901         count += 2;
902         /* context control state */
903         count += 3;
904
905         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
906                 for (ext = sect->section; ext->extent != NULL; ++ext) {
907                         if (sect->id == SECT_CONTEXT)
908                                 count += 2 + ext->reg_count;
909                         else
910                                 return 0;
911                 }
912         }
913
914         /* end clear state */
915         count += 2;
916         /* clear state */
917         count += 2;
918
919         return count;
920 }
921
922 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
923                                     volatile u32 *buffer)
924 {
925         u32 count = 0, i;
926         const struct cs_section_def *sect = NULL;
927         const struct cs_extent_def *ext = NULL;
928
929         if (adev->gfx.rlc.cs_data == NULL)
930                 return;
931         if (buffer == NULL)
932                 return;
933
934         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
935         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
936
937         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
938         buffer[count++] = cpu_to_le32(0x80000000);
939         buffer[count++] = cpu_to_le32(0x80000000);
940
941         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
942                 for (ext = sect->section; ext->extent != NULL; ++ext) {
943                         if (sect->id == SECT_CONTEXT) {
944                                 buffer[count++] =
945                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
946                                 buffer[count++] = cpu_to_le32(ext->reg_index -
947                                                 PACKET3_SET_CONTEXT_REG_START);
948                                 for (i = 0; i < ext->reg_count; i++)
949                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
950                         } else {
951                                 return;
952                         }
953                 }
954         }
955
956         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
957         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
958
959         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
960         buffer[count++] = cpu_to_le32(0);
961 }
962
963 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
964 {
965         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
966         uint32_t pg_always_on_cu_num = 2;
967         uint32_t always_on_cu_num;
968         uint32_t i, j, k;
969         uint32_t mask, cu_bitmap, counter;
970
971         if (adev->flags & AMD_IS_APU)
972                 always_on_cu_num = 4;
973         else if (adev->asic_type == CHIP_VEGA12)
974                 always_on_cu_num = 8;
975         else
976                 always_on_cu_num = 12;
977
978         mutex_lock(&adev->grbm_idx_mutex);
979         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
980                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
981                         mask = 1;
982                         cu_bitmap = 0;
983                         counter = 0;
984                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
985
986                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
987                                 if (cu_info->bitmap[i][j] & mask) {
988                                         if (counter == pg_always_on_cu_num)
989                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
990                                         if (counter < always_on_cu_num)
991                                                 cu_bitmap |= mask;
992                                         else
993                                                 break;
994                                         counter++;
995                                 }
996                                 mask <<= 1;
997                         }
998
999                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1000                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1001                 }
1002         }
1003         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1004         mutex_unlock(&adev->grbm_idx_mutex);
1005 }
1006
1007 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1008 {
1009         uint32_t data;
1010
1011         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1012         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1013         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1014         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1015         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1016
1017         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1018         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1019
1020         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1021         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1022
1023         mutex_lock(&adev->grbm_idx_mutex);
1024         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1025         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1026         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1027
1028         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1029         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1030         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1031         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1032         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1033
1034         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1035         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1036         data &= 0x0000FFFF;
1037         data |= 0x00C00000;
1038         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1039
1040         /*
1041          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1042          * programmed in gfx_v9_0_init_always_on_cu_mask()
1043          */
1044
1045         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1046          * but used for RLC_LB_CNTL configuration */
1047         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1048         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1049         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1050         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1051         mutex_unlock(&adev->grbm_idx_mutex);
1052
1053         gfx_v9_0_init_always_on_cu_mask(adev);
1054 }
1055
1056 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1057 {
1058         uint32_t data;
1059
1060         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1061         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1062         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1063         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1064         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1065
1066         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1067         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1068
1069         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1070         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1071
1072         mutex_lock(&adev->grbm_idx_mutex);
1073         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1074         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1075         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1076
1077         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1078         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1079         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1080         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1081         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1082
1083         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1084         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1085         data &= 0x0000FFFF;
1086         data |= 0x00C00000;
1087         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1088
1089         /*
1090          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1091          * programmed in gfx_v9_0_init_always_on_cu_mask()
1092          */
1093
1094         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1095          * but used for RLC_LB_CNTL configuration */
1096         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1097         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1098         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1099         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1100         mutex_unlock(&adev->grbm_idx_mutex);
1101
1102         gfx_v9_0_init_always_on_cu_mask(adev);
1103 }
1104
1105 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1106 {
1107         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1108 }
1109
1110 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1111 {
1112         return 5;
1113 }
1114
1115 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1116 {
1117         const struct cs_section_def *cs_data;
1118         int r;
1119
1120         adev->gfx.rlc.cs_data = gfx9_cs_data;
1121
1122         cs_data = adev->gfx.rlc.cs_data;
1123
1124         if (cs_data) {
1125                 /* init clear state block */
1126                 r = amdgpu_gfx_rlc_init_csb(adev);
1127                 if (r)
1128                         return r;
1129         }
1130
1131         if (adev->asic_type == CHIP_RAVEN) {
1132                 /* TODO: double check the cp_table_size for RV */
1133                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1134                 r = amdgpu_gfx_rlc_init_cpt(adev);
1135                 if (r)
1136                         return r;
1137         }
1138
1139         switch (adev->asic_type) {
1140         case CHIP_RAVEN:
1141                 gfx_v9_0_init_lbpw(adev);
1142                 break;
1143         case CHIP_VEGA20:
1144                 gfx_v9_4_init_lbpw(adev);
1145                 break;
1146         default:
1147                 break;
1148         }
1149
1150         return 0;
1151 }
1152
1153 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1154 {
1155         int r;
1156
1157         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1158         if (unlikely(r != 0))
1159                 return r;
1160
1161         r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1162                         AMDGPU_GEM_DOMAIN_VRAM);
1163         if (!r)
1164                 adev->gfx.rlc.clear_state_gpu_addr =
1165                         amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1166
1167         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1168
1169         return r;
1170 }
1171
1172 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1173 {
1174         int r;
1175
1176         if (!adev->gfx.rlc.clear_state_obj)
1177                 return;
1178
1179         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1180         if (likely(r == 0)) {
1181                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1182                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1183         }
1184 }
1185
1186 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1187 {
1188         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1189         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1190 }
1191
1192 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1193 {
1194         int r;
1195         u32 *hpd;
1196         const __le32 *fw_data;
1197         unsigned fw_size;
1198         u32 *fw;
1199         size_t mec_hpd_size;
1200
1201         const struct gfx_firmware_header_v1_0 *mec_hdr;
1202
1203         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1204
1205         /* take ownership of the relevant compute queues */
1206         amdgpu_gfx_compute_queue_acquire(adev);
1207         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1208
1209         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1210                                       AMDGPU_GEM_DOMAIN_VRAM,
1211                                       &adev->gfx.mec.hpd_eop_obj,
1212                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1213                                       (void **)&hpd);
1214         if (r) {
1215                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1216                 gfx_v9_0_mec_fini(adev);
1217                 return r;
1218         }
1219
1220         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1221
1222         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1223         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1224
1225         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1226
1227         fw_data = (const __le32 *)
1228                 (adev->gfx.mec_fw->data +
1229                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1230         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1231
1232         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1233                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1234                                       &adev->gfx.mec.mec_fw_obj,
1235                                       &adev->gfx.mec.mec_fw_gpu_addr,
1236                                       (void **)&fw);
1237         if (r) {
1238                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1239                 gfx_v9_0_mec_fini(adev);
1240                 return r;
1241         }
1242
1243         memcpy(fw, fw_data, fw_size);
1244
1245         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1246         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1247
1248         return 0;
1249 }
1250
1251 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1252 {
1253         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1254                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1255                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1256                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1257                 (SQ_IND_INDEX__FORCE_READ_MASK));
1258         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1259 }
1260
1261 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1262                            uint32_t wave, uint32_t thread,
1263                            uint32_t regno, uint32_t num, uint32_t *out)
1264 {
1265         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1266                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1267                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1268                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1269                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1270                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1271                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1272         while (num--)
1273                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1274 }
1275
1276 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1277 {
1278         /* type 1 wave data */
1279         dst[(*no_fields)++] = 1;
1280         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1281         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1282         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1283         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1284         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1285         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1286         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1287         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1288         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1289         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1290         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1291         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1292         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1293         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1294 }
1295
1296 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1297                                      uint32_t wave, uint32_t start,
1298                                      uint32_t size, uint32_t *dst)
1299 {
1300         wave_read_regs(
1301                 adev, simd, wave, 0,
1302                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1303 }
1304
1305 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1306                                      uint32_t wave, uint32_t thread,
1307                                      uint32_t start, uint32_t size,
1308                                      uint32_t *dst)
1309 {
1310         wave_read_regs(
1311                 adev, simd, wave, thread,
1312                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1313 }
1314
1315 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1316                                   u32 me, u32 pipe, u32 q)
1317 {
1318         soc15_grbm_select(adev, me, pipe, q, 0);
1319 }
1320
1321 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1322         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1323         .select_se_sh = &gfx_v9_0_select_se_sh,
1324         .read_wave_data = &gfx_v9_0_read_wave_data,
1325         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1326         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1327         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
1328 };
1329
1330 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1331 {
1332         u32 gb_addr_config;
1333         int err;
1334
1335         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1336
1337         switch (adev->asic_type) {
1338         case CHIP_VEGA10:
1339                 adev->gfx.config.max_hw_contexts = 8;
1340                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1341                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1342                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1343                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1344                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1345                 break;
1346         case CHIP_VEGA12:
1347                 adev->gfx.config.max_hw_contexts = 8;
1348                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1349                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1350                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1351                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1352                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1353                 DRM_INFO("fix gfx.config for vega12\n");
1354                 break;
1355         case CHIP_VEGA20:
1356                 adev->gfx.config.max_hw_contexts = 8;
1357                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1358                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1359                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1360                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1361                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1362                 gb_addr_config &= ~0xf3e777ff;
1363                 gb_addr_config |= 0x22014042;
1364                 /* check vbios table if gpu info is not available */
1365                 err = amdgpu_atomfirmware_get_gfx_info(adev);
1366                 if (err)
1367                         return err;
1368                 break;
1369         case CHIP_RAVEN:
1370                 adev->gfx.config.max_hw_contexts = 8;
1371                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1372                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1373                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1374                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1375                 if (adev->rev_id >= 8)
1376                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1377                 else
1378                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1379                 break;
1380         default:
1381                 BUG();
1382                 break;
1383         }
1384
1385         adev->gfx.config.gb_addr_config = gb_addr_config;
1386
1387         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1388                         REG_GET_FIELD(
1389                                         adev->gfx.config.gb_addr_config,
1390                                         GB_ADDR_CONFIG,
1391                                         NUM_PIPES);
1392
1393         adev->gfx.config.max_tile_pipes =
1394                 adev->gfx.config.gb_addr_config_fields.num_pipes;
1395
1396         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1397                         REG_GET_FIELD(
1398                                         adev->gfx.config.gb_addr_config,
1399                                         GB_ADDR_CONFIG,
1400                                         NUM_BANKS);
1401         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1402                         REG_GET_FIELD(
1403                                         adev->gfx.config.gb_addr_config,
1404                                         GB_ADDR_CONFIG,
1405                                         MAX_COMPRESSED_FRAGS);
1406         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1407                         REG_GET_FIELD(
1408                                         adev->gfx.config.gb_addr_config,
1409                                         GB_ADDR_CONFIG,
1410                                         NUM_RB_PER_SE);
1411         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1412                         REG_GET_FIELD(
1413                                         adev->gfx.config.gb_addr_config,
1414                                         GB_ADDR_CONFIG,
1415                                         NUM_SHADER_ENGINES);
1416         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1417                         REG_GET_FIELD(
1418                                         adev->gfx.config.gb_addr_config,
1419                                         GB_ADDR_CONFIG,
1420                                         PIPE_INTERLEAVE_SIZE));
1421
1422         return 0;
1423 }
1424
1425 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1426                                    struct amdgpu_ngg_buf *ngg_buf,
1427                                    int size_se,
1428                                    int default_size_se)
1429 {
1430         int r;
1431
1432         if (size_se < 0) {
1433                 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1434                 return -EINVAL;
1435         }
1436         size_se = size_se ? size_se : default_size_se;
1437
1438         ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1439         r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1440                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1441                                     &ngg_buf->bo,
1442                                     &ngg_buf->gpu_addr,
1443                                     NULL);
1444         if (r) {
1445                 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1446                 return r;
1447         }
1448         ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1449
1450         return r;
1451 }
1452
1453 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1454 {
1455         int i;
1456
1457         for (i = 0; i < NGG_BUF_MAX; i++)
1458                 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1459                                       &adev->gfx.ngg.buf[i].gpu_addr,
1460                                       NULL);
1461
1462         memset(&adev->gfx.ngg.buf[0], 0,
1463                         sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1464
1465         adev->gfx.ngg.init = false;
1466
1467         return 0;
1468 }
1469
1470 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1471 {
1472         int r;
1473
1474         if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1475                 return 0;
1476
1477         /* GDS reserve memory: 64 bytes alignment */
1478         adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1479         adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1480         adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1481         adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1482
1483         /* Primitive Buffer */
1484         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1485                                     amdgpu_prim_buf_per_se,
1486                                     64 * 1024);
1487         if (r) {
1488                 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1489                 goto err;
1490         }
1491
1492         /* Position Buffer */
1493         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1494                                     amdgpu_pos_buf_per_se,
1495                                     256 * 1024);
1496         if (r) {
1497                 dev_err(adev->dev, "Failed to create Position Buffer\n");
1498                 goto err;
1499         }
1500
1501         /* Control Sideband */
1502         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1503                                     amdgpu_cntl_sb_buf_per_se,
1504                                     256);
1505         if (r) {
1506                 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1507                 goto err;
1508         }
1509
1510         /* Parameter Cache, not created by default */
1511         if (amdgpu_param_buf_per_se <= 0)
1512                 goto out;
1513
1514         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
1515                                     amdgpu_param_buf_per_se,
1516                                     512 * 1024);
1517         if (r) {
1518                 dev_err(adev->dev, "Failed to create Parameter Cache\n");
1519                 goto err;
1520         }
1521
1522 out:
1523         adev->gfx.ngg.init = true;
1524         return 0;
1525 err:
1526         gfx_v9_0_ngg_fini(adev);
1527         return r;
1528 }
1529
1530 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1531 {
1532         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1533         int r;
1534         u32 data, base;
1535
1536         if (!amdgpu_ngg)
1537                 return 0;
1538
1539         /* Program buffer size */
1540         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1541                              adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1542         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1543                              adev->gfx.ngg.buf[NGG_POS].size >> 8);
1544         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
1545
1546         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1547                              adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1548         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1549                              adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
1550         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
1551
1552         /* Program buffer base address */
1553         base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1554         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
1555         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
1556
1557         base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1558         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
1559         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
1560
1561         base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1562         data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
1563         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
1564
1565         base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1566         data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
1567         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
1568
1569         base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1570         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
1571         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
1572
1573         base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1574         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
1575         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
1576
1577         /* Clear GDS reserved memory */
1578         r = amdgpu_ring_alloc(ring, 17);
1579         if (r) {
1580                 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
1581                           ring->name, r);
1582                 return r;
1583         }
1584
1585         gfx_v9_0_write_data_to_reg(ring, 0, false,
1586                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
1587                                    (adev->gds.gds_size +
1588                                     adev->gfx.ngg.gds_reserve_size));
1589
1590         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1591         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
1592                                 PACKET3_DMA_DATA_DST_SEL(1) |
1593                                 PACKET3_DMA_DATA_SRC_SEL(2)));
1594         amdgpu_ring_write(ring, 0);
1595         amdgpu_ring_write(ring, 0);
1596         amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1597         amdgpu_ring_write(ring, 0);
1598         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1599                                 adev->gfx.ngg.gds_reserve_size);
1600
1601         gfx_v9_0_write_data_to_reg(ring, 0, false,
1602                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
1603
1604         amdgpu_ring_commit(ring);
1605
1606         return 0;
1607 }
1608
1609 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1610                                       int mec, int pipe, int queue)
1611 {
1612         int r;
1613         unsigned irq_type;
1614         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1615
1616         ring = &adev->gfx.compute_ring[ring_id];
1617
1618         /* mec0 is me1 */
1619         ring->me = mec + 1;
1620         ring->pipe = pipe;
1621         ring->queue = queue;
1622
1623         ring->ring_obj = NULL;
1624         ring->use_doorbell = true;
1625         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1626         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1627                                 + (ring_id * GFX9_MEC_HPD_SIZE);
1628         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1629
1630         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1631                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1632                 + ring->pipe;
1633
1634         /* type-2 packets are deprecated on MEC, use type-3 instead */
1635         r = amdgpu_ring_init(adev, ring, 1024,
1636                              &adev->gfx.eop_irq, irq_type);
1637         if (r)
1638                 return r;
1639
1640
1641         return 0;
1642 }
1643
1644 static int gfx_v9_0_sw_init(void *handle)
1645 {
1646         int i, j, k, r, ring_id;
1647         struct amdgpu_ring *ring;
1648         struct amdgpu_kiq *kiq;
1649         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1650
1651         switch (adev->asic_type) {
1652         case CHIP_VEGA10:
1653         case CHIP_VEGA12:
1654         case CHIP_VEGA20:
1655         case CHIP_RAVEN:
1656                 adev->gfx.mec.num_mec = 2;
1657                 break;
1658         default:
1659                 adev->gfx.mec.num_mec = 1;
1660                 break;
1661         }
1662
1663         adev->gfx.mec.num_pipe_per_mec = 4;
1664         adev->gfx.mec.num_queue_per_pipe = 8;
1665
1666         /* EOP Event */
1667         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
1668         if (r)
1669                 return r;
1670
1671         /* Privileged reg */
1672         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
1673                               &adev->gfx.priv_reg_irq);
1674         if (r)
1675                 return r;
1676
1677         /* Privileged inst */
1678         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
1679                               &adev->gfx.priv_inst_irq);
1680         if (r)
1681                 return r;
1682
1683         /* ECC error */
1684         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
1685                               &adev->gfx.cp_ecc_error_irq);
1686         if (r)
1687                 return r;
1688
1689         /* FUE error */
1690         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
1691                               &adev->gfx.cp_ecc_error_irq);
1692         if (r)
1693                 return r;
1694
1695         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1696
1697         gfx_v9_0_scratch_init(adev);
1698
1699         r = gfx_v9_0_init_microcode(adev);
1700         if (r) {
1701                 DRM_ERROR("Failed to load gfx firmware!\n");
1702                 return r;
1703         }
1704
1705         r = adev->gfx.rlc.funcs->init(adev);
1706         if (r) {
1707                 DRM_ERROR("Failed to init rlc BOs!\n");
1708                 return r;
1709         }
1710
1711         r = gfx_v9_0_mec_init(adev);
1712         if (r) {
1713                 DRM_ERROR("Failed to init MEC BOs!\n");
1714                 return r;
1715         }
1716
1717         /* set up the gfx ring */
1718         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1719                 ring = &adev->gfx.gfx_ring[i];
1720                 ring->ring_obj = NULL;
1721                 if (!i)
1722                         sprintf(ring->name, "gfx");
1723                 else
1724                         sprintf(ring->name, "gfx_%d", i);
1725                 ring->use_doorbell = true;
1726                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1727                 r = amdgpu_ring_init(adev, ring, 1024,
1728                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
1729                 if (r)
1730                         return r;
1731         }
1732
1733         /* set up the compute queues - allocate horizontally across pipes */
1734         ring_id = 0;
1735         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1736                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1737                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1738                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1739                                         continue;
1740
1741                                 r = gfx_v9_0_compute_ring_init(adev,
1742                                                                ring_id,
1743                                                                i, k, j);
1744                                 if (r)
1745                                         return r;
1746
1747                                 ring_id++;
1748                         }
1749                 }
1750         }
1751
1752         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
1753         if (r) {
1754                 DRM_ERROR("Failed to init KIQ BOs!\n");
1755                 return r;
1756         }
1757
1758         kiq = &adev->gfx.kiq;
1759         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1760         if (r)
1761                 return r;
1762
1763         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
1764         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
1765         if (r)
1766                 return r;
1767
1768         adev->gfx.ce_ram_size = 0x8000;
1769
1770         r = gfx_v9_0_gpu_early_init(adev);
1771         if (r)
1772                 return r;
1773
1774         r = gfx_v9_0_ngg_init(adev);
1775         if (r)
1776                 return r;
1777
1778         return 0;
1779 }
1780
1781
1782 static int gfx_v9_0_sw_fini(void *handle)
1783 {
1784         int i;
1785         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1786
1787         if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
1788                         adev->gfx.ras_if) {
1789                 struct ras_common_if *ras_if = adev->gfx.ras_if;
1790                 struct ras_ih_if ih_info = {
1791                         .head = *ras_if,
1792                 };
1793
1794                 amdgpu_ras_debugfs_remove(adev, ras_if);
1795                 amdgpu_ras_sysfs_remove(adev, ras_if);
1796                 amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
1797                 amdgpu_ras_feature_enable(adev, ras_if, 0);
1798                 kfree(ras_if);
1799         }
1800
1801         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1802                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1803         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1804                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1805
1806         amdgpu_gfx_mqd_sw_fini(adev);
1807         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1808         amdgpu_gfx_kiq_fini(adev);
1809
1810         gfx_v9_0_mec_fini(adev);
1811         gfx_v9_0_ngg_fini(adev);
1812         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1813         if (adev->asic_type == CHIP_RAVEN) {
1814                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1815                                 &adev->gfx.rlc.cp_table_gpu_addr,
1816                                 (void **)&adev->gfx.rlc.cp_table_ptr);
1817         }
1818         gfx_v9_0_free_microcode(adev);
1819
1820         return 0;
1821 }
1822
1823
1824 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1825 {
1826         /* TODO */
1827 }
1828
1829 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1830 {
1831         u32 data;
1832
1833         if (instance == 0xffffffff)
1834                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1835         else
1836                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1837
1838         if (se_num == 0xffffffff)
1839                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1840         else
1841                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1842
1843         if (sh_num == 0xffffffff)
1844                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1845         else
1846                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1847
1848         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
1849 }
1850
1851 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1852 {
1853         u32 data, mask;
1854
1855         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1856         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
1857
1858         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1859         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1860
1861         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1862                                          adev->gfx.config.max_sh_per_se);
1863
1864         return (~data) & mask;
1865 }
1866
1867 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1868 {
1869         int i, j;
1870         u32 data;
1871         u32 active_rbs = 0;
1872         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1873                                         adev->gfx.config.max_sh_per_se;
1874
1875         mutex_lock(&adev->grbm_idx_mutex);
1876         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1877                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1878                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1879                         data = gfx_v9_0_get_rb_active_bitmap(adev);
1880                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1881                                                rb_bitmap_width_per_sh);
1882                 }
1883         }
1884         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1885         mutex_unlock(&adev->grbm_idx_mutex);
1886
1887         adev->gfx.config.backend_enable_mask = active_rbs;
1888         adev->gfx.config.num_rbs = hweight32(active_rbs);
1889 }
1890
1891 #define DEFAULT_SH_MEM_BASES    (0x6000)
1892 #define FIRST_COMPUTE_VMID      (8)
1893 #define LAST_COMPUTE_VMID       (16)
1894 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1895 {
1896         int i;
1897         uint32_t sh_mem_config;
1898         uint32_t sh_mem_bases;
1899
1900         /*
1901          * Configure apertures:
1902          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1903          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1904          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1905          */
1906         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1907
1908         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1909                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1910                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1911
1912         mutex_lock(&adev->srbm_mutex);
1913         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1914                 soc15_grbm_select(adev, 0, 0, 0, i);
1915                 /* CP and shaders */
1916                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1917                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
1918         }
1919         soc15_grbm_select(adev, 0, 0, 0, 0);
1920         mutex_unlock(&adev->srbm_mutex);
1921 }
1922
1923 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
1924 {
1925         u32 tmp;
1926         int i;
1927
1928         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1929
1930         gfx_v9_0_tiling_mode_table_init(adev);
1931
1932         gfx_v9_0_setup_rb(adev);
1933         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
1934         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
1935
1936         /* XXX SH_MEM regs */
1937         /* where to put LDS, scratch, GPUVM in FSA64 space */
1938         mutex_lock(&adev->srbm_mutex);
1939         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
1940                 soc15_grbm_select(adev, 0, 0, 0, i);
1941                 /* CP and shaders */
1942                 if (i == 0) {
1943                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1944                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1945                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1946                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
1947                 } else {
1948                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1949                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1950                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1951                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1952                                 (adev->gmc.private_aperture_start >> 48));
1953                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1954                                 (adev->gmc.shared_aperture_start >> 48));
1955                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
1956                 }
1957         }
1958         soc15_grbm_select(adev, 0, 0, 0, 0);
1959
1960         mutex_unlock(&adev->srbm_mutex);
1961
1962         gfx_v9_0_init_compute_vmid(adev);
1963 }
1964
1965 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1966 {
1967         u32 i, j, k;
1968         u32 mask;
1969
1970         mutex_lock(&adev->grbm_idx_mutex);
1971         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1972                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1973                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1974                         for (k = 0; k < adev->usec_timeout; k++) {
1975                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
1976                                         break;
1977                                 udelay(1);
1978                         }
1979                         if (k == adev->usec_timeout) {
1980                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
1981                                                       0xffffffff, 0xffffffff);
1982                                 mutex_unlock(&adev->grbm_idx_mutex);
1983                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
1984                                          i, j);
1985                                 return;
1986                         }
1987                 }
1988         }
1989         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1990         mutex_unlock(&adev->grbm_idx_mutex);
1991
1992         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
1993                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
1994                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
1995                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
1996         for (k = 0; k < adev->usec_timeout; k++) {
1997                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
1998                         break;
1999                 udelay(1);
2000         }
2001 }
2002
2003 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2004                                                bool enable)
2005 {
2006         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2007
2008         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2009         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2010         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2011         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2012
2013         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2014 }
2015
2016 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2017 {
2018         /* csib */
2019         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2020                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2021         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2022                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2023         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2024                         adev->gfx.rlc.clear_state_size);
2025 }
2026
2027 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2028                                 int indirect_offset,
2029                                 int list_size,
2030                                 int *unique_indirect_regs,
2031                                 int unique_indirect_reg_count,
2032                                 int *indirect_start_offsets,
2033                                 int *indirect_start_offsets_count,
2034                                 int max_start_offsets_count)
2035 {
2036         int idx;
2037
2038         for (; indirect_offset < list_size; indirect_offset++) {
2039                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2040                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2041                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2042
2043                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2044                         indirect_offset += 2;
2045
2046                         /* look for the matching indice */
2047                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2048                                 if (unique_indirect_regs[idx] ==
2049                                         register_list_format[indirect_offset] ||
2050                                         !unique_indirect_regs[idx])
2051                                         break;
2052                         }
2053
2054                         BUG_ON(idx >= unique_indirect_reg_count);
2055
2056                         if (!unique_indirect_regs[idx])
2057                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2058
2059                         indirect_offset++;
2060                 }
2061         }
2062 }
2063
2064 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2065 {
2066         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2067         int unique_indirect_reg_count = 0;
2068
2069         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2070         int indirect_start_offsets_count = 0;
2071
2072         int list_size = 0;
2073         int i = 0, j = 0;
2074         u32 tmp = 0;
2075
2076         u32 *register_list_format =
2077                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2078         if (!register_list_format)
2079                 return -ENOMEM;
2080         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
2081                 adev->gfx.rlc.reg_list_format_size_bytes);
2082
2083         /* setup unique_indirect_regs array and indirect_start_offsets array */
2084         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2085         gfx_v9_1_parse_ind_reg_list(register_list_format,
2086                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2087                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2088                                     unique_indirect_regs,
2089                                     unique_indirect_reg_count,
2090                                     indirect_start_offsets,
2091                                     &indirect_start_offsets_count,
2092                                     ARRAY_SIZE(indirect_start_offsets));
2093
2094         /* enable auto inc in case it is disabled */
2095         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2096         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2097         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2098
2099         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2100         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2101                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2102         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2103                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2104                         adev->gfx.rlc.register_restore[i]);
2105
2106         /* load indirect register */
2107         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2108                 adev->gfx.rlc.reg_list_format_start);
2109
2110         /* direct register portion */
2111         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2112                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2113                         register_list_format[i]);
2114
2115         /* indirect register portion */
2116         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2117                 if (register_list_format[i] == 0xFFFFFFFF) {
2118                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2119                         continue;
2120                 }
2121
2122                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2123                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2124
2125                 for (j = 0; j < unique_indirect_reg_count; j++) {
2126                         if (register_list_format[i] == unique_indirect_regs[j]) {
2127                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2128                                 break;
2129                         }
2130                 }
2131
2132                 BUG_ON(j >= unique_indirect_reg_count);
2133
2134                 i++;
2135         }
2136
2137         /* set save/restore list size */
2138         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2139         list_size = list_size >> 1;
2140         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2141                 adev->gfx.rlc.reg_restore_list_size);
2142         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2143
2144         /* write the starting offsets to RLC scratch ram */
2145         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2146                 adev->gfx.rlc.starting_offsets_start);
2147         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2148                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2149                        indirect_start_offsets[i]);
2150
2151         /* load unique indirect regs*/
2152         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2153                 if (unique_indirect_regs[i] != 0) {
2154                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2155                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2156                                unique_indirect_regs[i] & 0x3FFFF);
2157
2158                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2159                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2160                                unique_indirect_regs[i] >> 20);
2161                 }
2162         }
2163
2164         kfree(register_list_format);
2165         return 0;
2166 }
2167
2168 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2169 {
2170         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2171 }
2172
2173 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2174                                              bool enable)
2175 {
2176         uint32_t data = 0;
2177         uint32_t default_data = 0;
2178
2179         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2180         if (enable == true) {
2181                 /* enable GFXIP control over CGPG */
2182                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2183                 if(default_data != data)
2184                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2185
2186                 /* update status */
2187                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2188                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2189                 if(default_data != data)
2190                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2191         } else {
2192                 /* restore GFXIP control over GCPG */
2193                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2194                 if(default_data != data)
2195                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2196         }
2197 }
2198
2199 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2200 {
2201         uint32_t data = 0;
2202
2203         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2204                               AMD_PG_SUPPORT_GFX_SMG |
2205                               AMD_PG_SUPPORT_GFX_DMG)) {
2206                 /* init IDLE_POLL_COUNT = 60 */
2207                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2208                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2209                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2210                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2211
2212                 /* init RLC PG Delay */
2213                 data = 0;
2214                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2215                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2216                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2217                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2218                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2219
2220                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2221                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2222                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2223                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2224
2225                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2226                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2227                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2228                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2229
2230                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2231                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2232
2233                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2234                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2235                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2236
2237                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2238         }
2239 }
2240
2241 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2242                                                 bool enable)
2243 {
2244         uint32_t data = 0;
2245         uint32_t default_data = 0;
2246
2247         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2248         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2249                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2250                              enable ? 1 : 0);
2251         if (default_data != data)
2252                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2253 }
2254
2255 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2256                                                 bool enable)
2257 {
2258         uint32_t data = 0;
2259         uint32_t default_data = 0;
2260
2261         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2262         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2263                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2264                              enable ? 1 : 0);
2265         if(default_data != data)
2266                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2267 }
2268
2269 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2270                                         bool enable)
2271 {
2272         uint32_t data = 0;
2273         uint32_t default_data = 0;
2274
2275         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2276         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2277                              CP_PG_DISABLE,
2278                              enable ? 0 : 1);
2279         if(default_data != data)
2280                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2281 }
2282
2283 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2284                                                 bool enable)
2285 {
2286         uint32_t data, default_data;
2287
2288         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2289         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2290                              GFX_POWER_GATING_ENABLE,
2291                              enable ? 1 : 0);
2292         if(default_data != data)
2293                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2294 }
2295
2296 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2297                                                 bool enable)
2298 {
2299         uint32_t data, default_data;
2300
2301         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2302         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2303                              GFX_PIPELINE_PG_ENABLE,
2304                              enable ? 1 : 0);
2305         if(default_data != data)
2306                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2307
2308         if (!enable)
2309                 /* read any GFX register to wake up GFX */
2310                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2311 }
2312
2313 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2314                                                        bool enable)
2315 {
2316         uint32_t data, default_data;
2317
2318         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2319         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2320                              STATIC_PER_CU_PG_ENABLE,
2321                              enable ? 1 : 0);
2322         if(default_data != data)
2323                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2324 }
2325
2326 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2327                                                 bool enable)
2328 {
2329         uint32_t data, default_data;
2330
2331         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2332         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2333                              DYN_PER_CU_PG_ENABLE,
2334                              enable ? 1 : 0);
2335         if(default_data != data)
2336                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2337 }
2338
2339 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2340 {
2341         gfx_v9_0_init_csb(adev);
2342
2343         /*
2344          * Rlc save restore list is workable since v2_1.
2345          * And it's needed by gfxoff feature.
2346          */
2347         if (adev->gfx.rlc.is_rlc_v2_1) {
2348                 gfx_v9_1_init_rlc_save_restore_list(adev);
2349                 gfx_v9_0_enable_save_restore_machine(adev);
2350         }
2351
2352         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2353                               AMD_PG_SUPPORT_GFX_SMG |
2354                               AMD_PG_SUPPORT_GFX_DMG |
2355                               AMD_PG_SUPPORT_CP |
2356                               AMD_PG_SUPPORT_GDS |
2357                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2358                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2359                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2360                 gfx_v9_0_init_gfx_power_gating(adev);
2361         }
2362 }
2363
2364 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2365 {
2366         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2367         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2368         gfx_v9_0_wait_for_rlc_serdes(adev);
2369 }
2370
2371 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2372 {
2373         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2374         udelay(50);
2375         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2376         udelay(50);
2377 }
2378
2379 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2380 {
2381 #ifdef AMDGPU_RLC_DEBUG_RETRY
2382         u32 rlc_ucode_ver;
2383 #endif
2384
2385         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2386         udelay(50);
2387
2388         /* carrizo do enable cp interrupt after cp inited */
2389         if (!(adev->flags & AMD_IS_APU)) {
2390                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2391                 udelay(50);
2392         }
2393
2394 #ifdef AMDGPU_RLC_DEBUG_RETRY
2395         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2396         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2397         if(rlc_ucode_ver == 0x108) {
2398                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2399                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2400                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2401                  * default is 0x9C4 to create a 100us interval */
2402                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2403                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2404                  * to disable the page fault retry interrupts, default is
2405                  * 0x100 (256) */
2406                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2407         }
2408 #endif
2409 }
2410
2411 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2412 {
2413         const struct rlc_firmware_header_v2_0 *hdr;
2414         const __le32 *fw_data;
2415         unsigned i, fw_size;
2416
2417         if (!adev->gfx.rlc_fw)
2418                 return -EINVAL;
2419
2420         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2421         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2422
2423         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2424                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2425         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2426
2427         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2428                         RLCG_UCODE_LOADING_START_ADDRESS);
2429         for (i = 0; i < fw_size; i++)
2430                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2431         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2432
2433         return 0;
2434 }
2435
2436 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2437 {
2438         int r;
2439
2440         if (amdgpu_sriov_vf(adev)) {
2441                 gfx_v9_0_init_csb(adev);
2442                 return 0;
2443         }
2444
2445         adev->gfx.rlc.funcs->stop(adev);
2446
2447         /* disable CG */
2448         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2449
2450         gfx_v9_0_init_pg(adev);
2451
2452         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2453                 /* legacy rlc firmware loading */
2454                 r = gfx_v9_0_rlc_load_microcode(adev);
2455                 if (r)
2456                         return r;
2457         }
2458
2459         switch (adev->asic_type) {
2460         case CHIP_RAVEN:
2461                 if (amdgpu_lbpw == 0)
2462                         gfx_v9_0_enable_lbpw(adev, false);
2463                 else
2464                         gfx_v9_0_enable_lbpw(adev, true);
2465                 break;
2466         case CHIP_VEGA20:
2467                 if (amdgpu_lbpw > 0)
2468                         gfx_v9_0_enable_lbpw(adev, true);
2469                 else
2470                         gfx_v9_0_enable_lbpw(adev, false);
2471                 break;
2472         default:
2473                 break;
2474         }
2475
2476         adev->gfx.rlc.funcs->start(adev);
2477
2478         return 0;
2479 }
2480
2481 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2482 {
2483         int i;
2484         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2485
2486         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2487         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2488         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2489         if (!enable) {
2490                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2491                         adev->gfx.gfx_ring[i].sched.ready = false;
2492         }
2493         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2494         udelay(50);
2495 }
2496
2497 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2498 {
2499         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2500         const struct gfx_firmware_header_v1_0 *ce_hdr;
2501         const struct gfx_firmware_header_v1_0 *me_hdr;
2502         const __le32 *fw_data;
2503         unsigned i, fw_size;
2504
2505         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2506                 return -EINVAL;
2507
2508         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2509                 adev->gfx.pfp_fw->data;
2510         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2511                 adev->gfx.ce_fw->data;
2512         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2513                 adev->gfx.me_fw->data;
2514
2515         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2516         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2517         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2518
2519         gfx_v9_0_cp_gfx_enable(adev, false);
2520
2521         /* PFP */
2522         fw_data = (const __le32 *)
2523                 (adev->gfx.pfp_fw->data +
2524                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2525         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2526         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2527         for (i = 0; i < fw_size; i++)
2528                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2529         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2530
2531         /* CE */
2532         fw_data = (const __le32 *)
2533                 (adev->gfx.ce_fw->data +
2534                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2535         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2536         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2537         for (i = 0; i < fw_size; i++)
2538                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2539         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2540
2541         /* ME */
2542         fw_data = (const __le32 *)
2543                 (adev->gfx.me_fw->data +
2544                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2545         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2546         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2547         for (i = 0; i < fw_size; i++)
2548                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2549         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2550
2551         return 0;
2552 }
2553
2554 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2555 {
2556         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2557         const struct cs_section_def *sect = NULL;
2558         const struct cs_extent_def *ext = NULL;
2559         int r, i, tmp;
2560
2561         /* init the CP */
2562         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2563         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2564
2565         gfx_v9_0_cp_gfx_enable(adev, true);
2566
2567         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2568         if (r) {
2569                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2570                 return r;
2571         }
2572
2573         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2574         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2575
2576         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2577         amdgpu_ring_write(ring, 0x80000000);
2578         amdgpu_ring_write(ring, 0x80000000);
2579
2580         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2581                 for (ext = sect->section; ext->extent != NULL; ++ext) {
2582                         if (sect->id == SECT_CONTEXT) {
2583                                 amdgpu_ring_write(ring,
2584                                        PACKET3(PACKET3_SET_CONTEXT_REG,
2585                                                ext->reg_count));
2586                                 amdgpu_ring_write(ring,
2587                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2588                                 for (i = 0; i < ext->reg_count; i++)
2589                                         amdgpu_ring_write(ring, ext->extent[i]);
2590                         }
2591                 }
2592         }
2593
2594         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2595         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2596
2597         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2598         amdgpu_ring_write(ring, 0);
2599
2600         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2601         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2602         amdgpu_ring_write(ring, 0x8000);
2603         amdgpu_ring_write(ring, 0x8000);
2604
2605         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2606         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2607                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2608         amdgpu_ring_write(ring, tmp);
2609         amdgpu_ring_write(ring, 0);
2610
2611         amdgpu_ring_commit(ring);
2612
2613         return 0;
2614 }
2615
2616 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2617 {
2618         struct amdgpu_ring *ring;
2619         u32 tmp;
2620         u32 rb_bufsz;
2621         u64 rb_addr, rptr_addr, wptr_gpu_addr;
2622
2623         /* Set the write pointer delay */
2624         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2625
2626         /* set the RB to use vmid 0 */
2627         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2628
2629         /* Set ring buffer size */
2630         ring = &adev->gfx.gfx_ring[0];
2631         rb_bufsz = order_base_2(ring->ring_size / 8);
2632         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2633         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2634 #ifdef __BIG_ENDIAN
2635         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2636 #endif
2637         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2638
2639         /* Initialize the ring buffer's write pointers */
2640         ring->wptr = 0;
2641         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2642         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2643
2644         /* set the wb address wether it's enabled or not */
2645         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2646         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2647         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2648
2649         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2650         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2651         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
2652
2653         mdelay(1);
2654         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2655
2656         rb_addr = ring->gpu_addr >> 8;
2657         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2658         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2659
2660         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
2661         if (ring->use_doorbell) {
2662                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2663                                     DOORBELL_OFFSET, ring->doorbell_index);
2664                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2665                                     DOORBELL_EN, 1);
2666         } else {
2667                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2668         }
2669         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
2670
2671         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2672                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
2673         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2674
2675         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
2676                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2677
2678
2679         /* start the ring */
2680         gfx_v9_0_cp_gfx_start(adev);
2681         ring->sched.ready = true;
2682
2683         return 0;
2684 }
2685
2686 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2687 {
2688         int i;
2689
2690         if (enable) {
2691                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
2692         } else {
2693                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
2694                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2695                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2696                         adev->gfx.compute_ring[i].sched.ready = false;
2697                 adev->gfx.kiq.ring.sched.ready = false;
2698         }
2699         udelay(50);
2700 }
2701
2702 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2703 {
2704         const struct gfx_firmware_header_v1_0 *mec_hdr;
2705         const __le32 *fw_data;
2706         unsigned i;
2707         u32 tmp;
2708
2709         if (!adev->gfx.mec_fw)
2710                 return -EINVAL;
2711
2712         gfx_v9_0_cp_compute_enable(adev, false);
2713
2714         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2715         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2716
2717         fw_data = (const __le32 *)
2718                 (adev->gfx.mec_fw->data +
2719                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2720         tmp = 0;
2721         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2722         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2723         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
2724
2725         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
2726                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
2727         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2728                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2729
2730         /* MEC1 */
2731         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2732                          mec_hdr->jt_offset);
2733         for (i = 0; i < mec_hdr->jt_size; i++)
2734                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
2735                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2736
2737         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2738                         adev->gfx.mec_fw_version);
2739         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2740
2741         return 0;
2742 }
2743
2744 /* KIQ functions */
2745 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2746 {
2747         uint32_t tmp;
2748         struct amdgpu_device *adev = ring->adev;
2749
2750         /* tell RLC which is KIQ queue */
2751         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2752         tmp &= 0xffffff00;
2753         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2754         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2755         tmp |= 0x80;
2756         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2757 }
2758
2759 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2760 {
2761         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
2762         uint64_t queue_mask = 0;
2763         int r, i;
2764
2765         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2766                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2767                         continue;
2768
2769                 /* This situation may be hit in the future if a new HW
2770                  * generation exposes more than 64 queues. If so, the
2771                  * definition of queue_mask needs updating */
2772                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
2773                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2774                         break;
2775                 }
2776
2777                 queue_mask |= (1ull << i);
2778         }
2779
2780         r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
2781         if (r) {
2782                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2783                 return r;
2784         }
2785
2786         /* set resources */
2787         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2788         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2789                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
2790         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
2791         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
2792         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
2793         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
2794         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
2795         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
2796         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2797                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2798                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2799                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2800
2801                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2802                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2803                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2804                                   PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2805                                   PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2806                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2807                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2808                                   PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2809                                   PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2810                                   PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2811                                   PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2812                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2813                 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2814                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2815                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2816                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2817                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2818         }
2819
2820         r = amdgpu_ring_test_helper(kiq_ring);
2821         if (r)
2822                 DRM_ERROR("KCQ enable failed\n");
2823
2824         return r;
2825 }
2826
2827 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2828 {
2829         struct amdgpu_device *adev = ring->adev;
2830         struct v9_mqd *mqd = ring->mqd_ptr;
2831         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2832         uint32_t tmp;
2833
2834         mqd->header = 0xC0310800;
2835         mqd->compute_pipelinestat_enable = 0x00000001;
2836         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2837         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2838         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2839         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2840         mqd->compute_misc_reserved = 0x00000003;
2841
2842         mqd->dynamic_cu_mask_addr_lo =
2843                 lower_32_bits(ring->mqd_gpu_addr
2844                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2845         mqd->dynamic_cu_mask_addr_hi =
2846                 upper_32_bits(ring->mqd_gpu_addr
2847                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2848
2849         eop_base_addr = ring->eop_gpu_addr >> 8;
2850         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2851         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2852
2853         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2854         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
2855         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2856                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
2857
2858         mqd->cp_hqd_eop_control = tmp;
2859
2860         /* enable doorbell? */
2861         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2862
2863         if (ring->use_doorbell) {
2864                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2865                                     DOORBELL_OFFSET, ring->doorbell_index);
2866                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2867                                     DOORBELL_EN, 1);
2868                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2869                                     DOORBELL_SOURCE, 0);
2870                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2871                                     DOORBELL_HIT, 0);
2872         } else {
2873                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2874                                          DOORBELL_EN, 0);
2875         }
2876
2877         mqd->cp_hqd_pq_doorbell_control = tmp;
2878
2879         /* disable the queue if it's active */
2880         ring->wptr = 0;
2881         mqd->cp_hqd_dequeue_request = 0;
2882         mqd->cp_hqd_pq_rptr = 0;
2883         mqd->cp_hqd_pq_wptr_lo = 0;
2884         mqd->cp_hqd_pq_wptr_hi = 0;
2885
2886         /* set the pointer to the MQD */
2887         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
2888         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
2889
2890         /* set MQD vmid to 0 */
2891         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
2892         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2893         mqd->cp_mqd_control = tmp;
2894
2895         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2896         hqd_gpu_addr = ring->gpu_addr >> 8;
2897         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2898         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2899
2900         /* set up the HQD, this is similar to CP_RB0_CNTL */
2901         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
2902         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2903                             (order_base_2(ring->ring_size / 4) - 1));
2904         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2905                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2906 #ifdef __BIG_ENDIAN
2907         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
2908 #endif
2909         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2910         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
2911         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2912         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2913         mqd->cp_hqd_pq_control = tmp;
2914
2915         /* set the wb address whether it's enabled or not */
2916         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2917         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2918         mqd->cp_hqd_pq_rptr_report_addr_hi =
2919                 upper_32_bits(wb_gpu_addr) & 0xffff;
2920
2921         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2922         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2923         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2924         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2925
2926         tmp = 0;
2927         /* enable the doorbell if requested */
2928         if (ring->use_doorbell) {
2929                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2930                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2931                                 DOORBELL_OFFSET, ring->doorbell_index);
2932
2933                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2934                                          DOORBELL_EN, 1);
2935                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2936                                          DOORBELL_SOURCE, 0);
2937                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2938                                          DOORBELL_HIT, 0);
2939         }
2940
2941         mqd->cp_hqd_pq_doorbell_control = tmp;
2942
2943         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2944         ring->wptr = 0;
2945         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
2946
2947         /* set the vmid for the queue */
2948         mqd->cp_hqd_vmid = 0;
2949
2950         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
2951         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2952         mqd->cp_hqd_persistent_state = tmp;
2953
2954         /* set MIN_IB_AVAIL_SIZE */
2955         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
2956         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
2957         mqd->cp_hqd_ib_control = tmp;
2958
2959         /* activate the queue */
2960         mqd->cp_hqd_active = 1;
2961
2962         return 0;
2963 }
2964
2965 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2966 {
2967         struct amdgpu_device *adev = ring->adev;
2968         struct v9_mqd *mqd = ring->mqd_ptr;
2969         int j;
2970
2971         /* disable wptr polling */
2972         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2973
2974         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
2975                mqd->cp_hqd_eop_base_addr_lo);
2976         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
2977                mqd->cp_hqd_eop_base_addr_hi);
2978
2979         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2980         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
2981                mqd->cp_hqd_eop_control);
2982
2983         /* enable doorbell? */
2984         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
2985                mqd->cp_hqd_pq_doorbell_control);
2986
2987         /* disable the queue if it's active */
2988         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
2989                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
2990                 for (j = 0; j < adev->usec_timeout; j++) {
2991                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
2992                                 break;
2993                         udelay(1);
2994                 }
2995                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
2996                        mqd->cp_hqd_dequeue_request);
2997                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
2998                        mqd->cp_hqd_pq_rptr);
2999                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3000                        mqd->cp_hqd_pq_wptr_lo);
3001                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3002                        mqd->cp_hqd_pq_wptr_hi);
3003         }
3004
3005         /* set the pointer to the MQD */
3006         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3007                mqd->cp_mqd_base_addr_lo);
3008         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3009                mqd->cp_mqd_base_addr_hi);
3010
3011         /* set MQD vmid to 0 */
3012         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3013                mqd->cp_mqd_control);
3014
3015         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3016         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3017                mqd->cp_hqd_pq_base_lo);
3018         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3019                mqd->cp_hqd_pq_base_hi);
3020
3021         /* set up the HQD, this is similar to CP_RB0_CNTL */
3022         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3023                mqd->cp_hqd_pq_control);
3024
3025         /* set the wb address whether it's enabled or not */
3026         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3027                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3028         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3029                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3030
3031         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3032         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3033                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3034         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3035                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3036
3037         /* enable the doorbell if requested */
3038         if (ring->use_doorbell) {
3039                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3040                                         (adev->doorbell_index.kiq * 2) << 2);
3041                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3042                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3043         }
3044
3045         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3046                mqd->cp_hqd_pq_doorbell_control);
3047
3048         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3049         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3050                mqd->cp_hqd_pq_wptr_lo);
3051         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3052                mqd->cp_hqd_pq_wptr_hi);
3053
3054         /* set the vmid for the queue */
3055         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3056
3057         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3058                mqd->cp_hqd_persistent_state);
3059
3060         /* activate the queue */
3061         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3062                mqd->cp_hqd_active);
3063
3064         if (ring->use_doorbell)
3065                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3066
3067         return 0;
3068 }
3069
3070 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3071 {
3072         struct amdgpu_device *adev = ring->adev;
3073         int j;
3074
3075         /* disable the queue if it's active */
3076         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3077
3078                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3079
3080                 for (j = 0; j < adev->usec_timeout; j++) {
3081                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3082                                 break;
3083                         udelay(1);
3084                 }
3085
3086                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3087                         DRM_DEBUG("KIQ dequeue request failed.\n");
3088
3089                         /* Manual disable if dequeue request times out */
3090                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3091                 }
3092
3093                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3094                       0);
3095         }
3096
3097         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3098         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3099         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3100         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3101         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3102         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3103         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3104         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3105
3106         return 0;
3107 }
3108
3109 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3110 {
3111         struct amdgpu_device *adev = ring->adev;
3112         struct v9_mqd *mqd = ring->mqd_ptr;
3113         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3114
3115         gfx_v9_0_kiq_setting(ring);
3116
3117         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3118                 /* reset MQD to a clean status */
3119                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3120                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3121
3122                 /* reset ring buffer */
3123                 ring->wptr = 0;
3124                 amdgpu_ring_clear_ring(ring);
3125
3126                 mutex_lock(&adev->srbm_mutex);
3127                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3128                 gfx_v9_0_kiq_init_register(ring);
3129                 soc15_grbm_select(adev, 0, 0, 0, 0);
3130                 mutex_unlock(&adev->srbm_mutex);
3131         } else {
3132                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3133                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3134                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3135                 mutex_lock(&adev->srbm_mutex);
3136                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3137                 gfx_v9_0_mqd_init(ring);
3138                 gfx_v9_0_kiq_init_register(ring);
3139                 soc15_grbm_select(adev, 0, 0, 0, 0);
3140                 mutex_unlock(&adev->srbm_mutex);
3141
3142                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3143                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3144         }
3145
3146         return 0;
3147 }
3148
3149 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3150 {
3151         struct amdgpu_device *adev = ring->adev;
3152         struct v9_mqd *mqd = ring->mqd_ptr;
3153         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3154
3155         if (!adev->in_gpu_reset && !adev->in_suspend) {
3156                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3157                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3158                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3159                 mutex_lock(&adev->srbm_mutex);
3160                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3161                 gfx_v9_0_mqd_init(ring);
3162                 soc15_grbm_select(adev, 0, 0, 0, 0);
3163                 mutex_unlock(&adev->srbm_mutex);
3164
3165                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3166                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3167         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3168                 /* reset MQD to a clean status */
3169                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3170                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3171
3172                 /* reset ring buffer */
3173                 ring->wptr = 0;
3174                 amdgpu_ring_clear_ring(ring);
3175         } else {
3176                 amdgpu_ring_clear_ring(ring);
3177         }
3178
3179         return 0;
3180 }
3181
3182 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3183 {
3184         struct amdgpu_ring *ring;
3185         int r;
3186
3187         ring = &adev->gfx.kiq.ring;
3188
3189         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3190         if (unlikely(r != 0))
3191                 return r;
3192
3193         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3194         if (unlikely(r != 0))
3195                 return r;
3196
3197         gfx_v9_0_kiq_init_queue(ring);
3198         amdgpu_bo_kunmap(ring->mqd_obj);
3199         ring->mqd_ptr = NULL;
3200         amdgpu_bo_unreserve(ring->mqd_obj);
3201         ring->sched.ready = true;
3202         return 0;
3203 }
3204
3205 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3206 {
3207         struct amdgpu_ring *ring = NULL;
3208         int r = 0, i;
3209
3210         gfx_v9_0_cp_compute_enable(adev, true);
3211
3212         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3213                 ring = &adev->gfx.compute_ring[i];
3214
3215                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3216                 if (unlikely(r != 0))
3217                         goto done;
3218                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3219                 if (!r) {
3220                         r = gfx_v9_0_kcq_init_queue(ring);
3221                         amdgpu_bo_kunmap(ring->mqd_obj);
3222                         ring->mqd_ptr = NULL;
3223                 }
3224                 amdgpu_bo_unreserve(ring->mqd_obj);
3225                 if (r)
3226                         goto done;
3227         }
3228
3229         r = gfx_v9_0_kiq_kcq_enable(adev);
3230 done:
3231         return r;
3232 }
3233
3234 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3235 {
3236         int r, i;
3237         struct amdgpu_ring *ring;
3238
3239         if (!(adev->flags & AMD_IS_APU))
3240                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3241
3242         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3243                 /* legacy firmware loading */
3244                 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3245                 if (r)
3246                         return r;
3247
3248                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3249                 if (r)
3250                         return r;
3251         }
3252
3253         r = gfx_v9_0_kiq_resume(adev);
3254         if (r)
3255                 return r;
3256
3257         r = gfx_v9_0_cp_gfx_resume(adev);
3258         if (r)
3259                 return r;
3260
3261         r = gfx_v9_0_kcq_resume(adev);
3262         if (r)
3263                 return r;
3264
3265         ring = &adev->gfx.gfx_ring[0];
3266         r = amdgpu_ring_test_helper(ring);
3267         if (r)
3268                 return r;
3269
3270         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3271                 ring = &adev->gfx.compute_ring[i];
3272                 amdgpu_ring_test_helper(ring);
3273         }
3274
3275         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3276
3277         return 0;
3278 }
3279
3280 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3281 {
3282         gfx_v9_0_cp_gfx_enable(adev, enable);
3283         gfx_v9_0_cp_compute_enable(adev, enable);
3284 }
3285
3286 static int gfx_v9_0_hw_init(void *handle)
3287 {
3288         int r;
3289         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3290
3291         gfx_v9_0_init_golden_registers(adev);
3292
3293         gfx_v9_0_constants_init(adev);
3294
3295         r = gfx_v9_0_csb_vram_pin(adev);
3296         if (r)
3297                 return r;
3298
3299         r = adev->gfx.rlc.funcs->resume(adev);
3300         if (r)
3301                 return r;
3302
3303         r = gfx_v9_0_cp_resume(adev);
3304         if (r)
3305                 return r;
3306
3307         r = gfx_v9_0_ngg_en(adev);
3308         if (r)
3309                 return r;
3310
3311         return r;
3312 }
3313
3314 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3315 {
3316         int r, i;
3317         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3318
3319         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3320         if (r)
3321                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3322
3323         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3324                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3325
3326                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3327                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3328                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3329                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3330                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3331                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3332                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3333                 amdgpu_ring_write(kiq_ring, 0);
3334                 amdgpu_ring_write(kiq_ring, 0);
3335                 amdgpu_ring_write(kiq_ring, 0);
3336         }
3337         r = amdgpu_ring_test_helper(kiq_ring);
3338         if (r)
3339                 DRM_ERROR("KCQ disable failed\n");
3340
3341         return r;
3342 }
3343
3344 static int gfx_v9_0_hw_fini(void *handle)
3345 {
3346         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3347
3348         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3349         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3350         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3351
3352         /* disable KCQ to avoid CPC touch memory not valid anymore */
3353         gfx_v9_0_kcq_disable(adev);
3354
3355         if (amdgpu_sriov_vf(adev)) {
3356                 gfx_v9_0_cp_gfx_enable(adev, false);
3357                 /* must disable polling for SRIOV when hw finished, otherwise
3358                  * CPC engine may still keep fetching WB address which is already
3359                  * invalid after sw finished and trigger DMAR reading error in
3360                  * hypervisor side.
3361                  */
3362                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3363                 return 0;
3364         }
3365
3366         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3367          * otherwise KIQ is hanging when binding back
3368          */
3369         if (!adev->in_gpu_reset && !adev->in_suspend) {
3370                 mutex_lock(&adev->srbm_mutex);
3371                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3372                                 adev->gfx.kiq.ring.pipe,
3373                                 adev->gfx.kiq.ring.queue, 0);
3374                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3375                 soc15_grbm_select(adev, 0, 0, 0, 0);
3376                 mutex_unlock(&adev->srbm_mutex);
3377         }
3378
3379         gfx_v9_0_cp_enable(adev, false);
3380         adev->gfx.rlc.funcs->stop(adev);
3381
3382         gfx_v9_0_csb_vram_unpin(adev);
3383
3384         return 0;
3385 }
3386
3387 static int gfx_v9_0_suspend(void *handle)
3388 {
3389         return gfx_v9_0_hw_fini(handle);
3390 }
3391
3392 static int gfx_v9_0_resume(void *handle)
3393 {
3394         return gfx_v9_0_hw_init(handle);
3395 }
3396
3397 static bool gfx_v9_0_is_idle(void *handle)
3398 {
3399         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3400
3401         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3402                                 GRBM_STATUS, GUI_ACTIVE))
3403                 return false;
3404         else
3405                 return true;
3406 }
3407
3408 static int gfx_v9_0_wait_for_idle(void *handle)
3409 {
3410         unsigned i;
3411         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3412
3413         for (i = 0; i < adev->usec_timeout; i++) {
3414                 if (gfx_v9_0_is_idle(handle))
3415                         return 0;
3416                 udelay(1);
3417         }
3418         return -ETIMEDOUT;
3419 }
3420
3421 static int gfx_v9_0_soft_reset(void *handle)
3422 {
3423         u32 grbm_soft_reset = 0;
3424         u32 tmp;
3425         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3426
3427         /* GRBM_STATUS */
3428         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3429         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3430                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3431                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3432                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3433                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3434                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3435                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3436                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3437                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3438                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3439         }
3440
3441         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3442                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3443                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3444         }
3445
3446         /* GRBM_STATUS2 */
3447         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3448         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3449                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3450                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3451
3452
3453         if (grbm_soft_reset) {
3454                 /* stop the rlc */
3455                 adev->gfx.rlc.funcs->stop(adev);
3456
3457                 /* Disable GFX parsing/prefetching */
3458                 gfx_v9_0_cp_gfx_enable(adev, false);
3459
3460                 /* Disable MEC parsing/prefetching */
3461                 gfx_v9_0_cp_compute_enable(adev, false);
3462
3463                 if (grbm_soft_reset) {
3464                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3465                         tmp |= grbm_soft_reset;
3466                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3467                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3468                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3469
3470                         udelay(50);
3471
3472                         tmp &= ~grbm_soft_reset;
3473                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3474                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3475                 }
3476
3477                 /* Wait a little for things to settle down */
3478                 udelay(50);
3479         }
3480         return 0;
3481 }
3482
3483 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3484 {
3485         uint64_t clock;
3486
3487         mutex_lock(&adev->gfx.gpu_clock_mutex);
3488         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3489         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3490                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3491         mutex_unlock(&adev->gfx.gpu_clock_mutex);
3492         return clock;
3493 }
3494
3495 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3496                                           uint32_t vmid,
3497                                           uint32_t gds_base, uint32_t gds_size,
3498                                           uint32_t gws_base, uint32_t gws_size,
3499                                           uint32_t oa_base, uint32_t oa_size)
3500 {
3501         struct amdgpu_device *adev = ring->adev;
3502
3503         /* GDS Base */
3504         gfx_v9_0_write_data_to_reg(ring, 0, false,
3505                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3506                                    gds_base);
3507
3508         /* GDS Size */
3509         gfx_v9_0_write_data_to_reg(ring, 0, false,
3510                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3511                                    gds_size);
3512
3513         /* GWS */
3514         gfx_v9_0_write_data_to_reg(ring, 0, false,
3515                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3516                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3517
3518         /* OA */
3519         gfx_v9_0_write_data_to_reg(ring, 0, false,
3520                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3521                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
3522 }
3523
3524 static const u32 vgpr_init_compute_shader[] =
3525 {
3526         0xb07c0000, 0xbe8000ff,
3527         0x000000f8, 0xbf110800,
3528         0x7e000280, 0x7e020280,
3529         0x7e040280, 0x7e060280,
3530         0x7e080280, 0x7e0a0280,
3531         0x7e0c0280, 0x7e0e0280,
3532         0x80808800, 0xbe803200,
3533         0xbf84fff5, 0xbf9c0000,
3534         0xd28c0001, 0x0001007f,
3535         0xd28d0001, 0x0002027e,
3536         0x10020288, 0xb8810904,
3537         0xb7814000, 0xd1196a01,
3538         0x00000301, 0xbe800087,
3539         0xbefc00c1, 0xd89c4000,
3540         0x00020201, 0xd89cc080,
3541         0x00040401, 0x320202ff,
3542         0x00000800, 0x80808100,
3543         0xbf84fff8, 0x7e020280,
3544         0xbf810000, 0x00000000,
3545 };
3546
3547 static const u32 sgpr_init_compute_shader[] =
3548 {
3549         0xb07c0000, 0xbe8000ff,
3550         0x0000005f, 0xbee50080,
3551         0xbe812c65, 0xbe822c65,
3552         0xbe832c65, 0xbe842c65,
3553         0xbe852c65, 0xb77c0005,
3554         0x80808500, 0xbf84fff8,
3555         0xbe800080, 0xbf810000,
3556 };
3557
3558 static const struct soc15_reg_entry vgpr_init_regs[] = {
3559    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3560    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3561    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3562    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3563    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3564    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3565    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3566    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3567    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3568    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3569 };
3570
3571 static const struct soc15_reg_entry sgpr_init_regs[] = {
3572    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3573    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3574    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3575    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3576    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3577    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3578    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3579    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3580    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
3581    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3582 };
3583
3584 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3585    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
3586    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
3587    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
3588    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
3589    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
3590    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
3591    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
3592    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
3593    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
3594    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
3595    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
3596    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
3597    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
3598    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
3599    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
3600    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
3601    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
3602    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
3603    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
3604    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
3605    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
3606    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
3607    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
3608    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
3609    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
3610    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
3611    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
3612    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
3613    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
3614    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
3615    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
3616    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
3617 };
3618
3619 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
3620 {
3621         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3622         int i, r;
3623
3624         r = amdgpu_ring_alloc(ring, 7);
3625         if (r) {
3626                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
3627                         ring->name, r);
3628                 return r;
3629         }
3630
3631         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
3632         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
3633
3634         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3635         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
3636                                 PACKET3_DMA_DATA_DST_SEL(1) |
3637                                 PACKET3_DMA_DATA_SRC_SEL(2) |
3638                                 PACKET3_DMA_DATA_ENGINE(0)));
3639         amdgpu_ring_write(ring, 0);
3640         amdgpu_ring_write(ring, 0);
3641         amdgpu_ring_write(ring, 0);
3642         amdgpu_ring_write(ring, 0);
3643         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
3644                                 adev->gds.gds_size);
3645
3646         amdgpu_ring_commit(ring);
3647
3648         for (i = 0; i < adev->usec_timeout; i++) {
3649                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
3650                         break;
3651                 udelay(1);
3652         }
3653
3654         if (i >= adev->usec_timeout)
3655                 r = -ETIMEDOUT;
3656
3657         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
3658
3659         return r;
3660 }
3661
3662 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
3663 {
3664         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3665         struct amdgpu_ib ib;
3666         struct dma_fence *f = NULL;
3667         int r, i, j, k;
3668         unsigned total_size, vgpr_offset, sgpr_offset;
3669         u64 gpu_addr;
3670
3671         /* only support when RAS is enabled */
3672         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3673                 return 0;
3674
3675         /* bail if the compute ring is not ready */
3676         if (!ring->sched.ready)
3677                 return 0;
3678
3679         total_size =
3680                 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3681         total_size +=
3682                 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3683         total_size = ALIGN(total_size, 256);
3684         vgpr_offset = total_size;
3685         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
3686         sgpr_offset = total_size;
3687         total_size += sizeof(sgpr_init_compute_shader);
3688
3689         /* allocate an indirect buffer to put the commands in */
3690         memset(&ib, 0, sizeof(ib));
3691         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
3692         if (r) {
3693                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
3694                 return r;
3695         }
3696
3697         /* load the compute shaders */
3698         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
3699                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
3700
3701         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
3702                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
3703
3704         /* init the ib length to 0 */
3705         ib.length_dw = 0;
3706
3707         /* VGPR */
3708         /* write the register state for the compute dispatch */
3709         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
3710                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3711                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
3712                                                                 - PACKET3_SET_SH_REG_START;
3713                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
3714         }
3715         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3716         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
3717         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3718         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3719                                                         - PACKET3_SET_SH_REG_START;
3720         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3721         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3722
3723         /* write dispatch packet */
3724         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3725         ib.ptr[ib.length_dw++] = 128; /* x */
3726         ib.ptr[ib.length_dw++] = 1; /* y */
3727         ib.ptr[ib.length_dw++] = 1; /* z */
3728         ib.ptr[ib.length_dw++] =
3729                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3730
3731         /* write CS partial flush packet */
3732         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3733         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3734
3735         /* SGPR */
3736         /* write the register state for the compute dispatch */
3737         for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
3738                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3739                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
3740                                                                 - PACKET3_SET_SH_REG_START;
3741                 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
3742         }
3743         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3744         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
3745         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3746         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3747                                                         - PACKET3_SET_SH_REG_START;
3748         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3749         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3750
3751         /* write dispatch packet */
3752         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3753         ib.ptr[ib.length_dw++] = 128; /* x */
3754         ib.ptr[ib.length_dw++] = 1; /* y */
3755         ib.ptr[ib.length_dw++] = 1; /* z */
3756         ib.ptr[ib.length_dw++] =
3757                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3758
3759         /* write CS partial flush packet */
3760         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3761         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3762
3763         /* shedule the ib on the ring */
3764         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
3765         if (r) {
3766                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
3767                 goto fail;
3768         }
3769
3770         /* wait for the GPU to finish processing the IB */
3771         r = dma_fence_wait(f, false);
3772         if (r) {
3773                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
3774                 goto fail;
3775         }
3776
3777         /* read back registers to clear the counters */
3778         mutex_lock(&adev->grbm_idx_mutex);
3779         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
3780                 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
3781                         for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
3782                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
3783                                 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3784                         }
3785                 }
3786         }
3787         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
3788         mutex_unlock(&adev->grbm_idx_mutex);
3789
3790 fail:
3791         amdgpu_ib_free(adev, &ib, NULL);
3792         dma_fence_put(f);
3793
3794         return r;
3795 }
3796
3797 static int gfx_v9_0_early_init(void *handle)
3798 {
3799         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3800
3801         adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
3802         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
3803         gfx_v9_0_set_ring_funcs(adev);
3804         gfx_v9_0_set_irq_funcs(adev);
3805         gfx_v9_0_set_gds_init(adev);
3806         gfx_v9_0_set_rlc_funcs(adev);
3807
3808         return 0;
3809 }
3810
3811 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
3812                 struct amdgpu_iv_entry *entry);
3813
3814 static int gfx_v9_0_ecc_late_init(void *handle)
3815 {
3816         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3817         struct ras_common_if **ras_if = &adev->gfx.ras_if;
3818         struct ras_ih_if ih_info = {
3819                 .cb = gfx_v9_0_process_ras_data_cb,
3820         };
3821         struct ras_fs_if fs_info = {
3822                 .sysfs_name = "gfx_err_count",
3823                 .debugfs_name = "gfx_err_inject",
3824         };
3825         struct ras_common_if ras_block = {
3826                 .block = AMDGPU_RAS_BLOCK__GFX,
3827                 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
3828                 .sub_block_index = 0,
3829                 .name = "gfx",
3830         };
3831         int r;
3832
3833         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
3834                 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
3835                 return 0;
3836         }
3837
3838         r = gfx_v9_0_do_edc_gds_workarounds(adev);
3839         if (r)
3840                 return r;
3841
3842         /* requires IBs so do in late init after IB pool is initialized */
3843         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
3844         if (r)
3845                 return r;
3846
3847         /* handle resume path. */
3848         if (*ras_if) {
3849                 /* resend ras TA enable cmd during resume.
3850                  * prepare to handle failure.
3851                  */
3852                 ih_info.head = **ras_if;
3853                 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3854                 if (r) {
3855                         if (r == -EAGAIN) {
3856                                 /* request a gpu reset. will run again. */
3857                                 amdgpu_ras_request_reset_on_boot(adev,
3858                                                 AMDGPU_RAS_BLOCK__GFX);
3859                                 return 0;
3860                         }
3861                         /* fail to enable ras, cleanup all. */
3862                         goto irq;
3863                 }
3864                 /* enable successfully. continue. */
3865                 goto resume;
3866         }
3867
3868         *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
3869         if (!*ras_if)
3870                 return -ENOMEM;
3871
3872         **ras_if = ras_block;
3873
3874         r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3875         if (r) {
3876                 if (r == -EAGAIN) {
3877                         amdgpu_ras_request_reset_on_boot(adev,
3878                                         AMDGPU_RAS_BLOCK__GFX);
3879                         r = 0;
3880                 }
3881                 goto feature;
3882         }
3883
3884         ih_info.head = **ras_if;
3885         fs_info.head = **ras_if;
3886
3887         r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
3888         if (r)
3889                 goto interrupt;
3890
3891         amdgpu_ras_debugfs_create(adev, &fs_info);
3892
3893         r = amdgpu_ras_sysfs_create(adev, &fs_info);
3894         if (r)
3895                 goto sysfs;
3896 resume:
3897         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
3898         if (r)
3899                 goto irq;
3900
3901         return 0;
3902 irq:
3903         amdgpu_ras_sysfs_remove(adev, *ras_if);
3904 sysfs:
3905         amdgpu_ras_debugfs_remove(adev, *ras_if);
3906         amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
3907 interrupt:
3908         amdgpu_ras_feature_enable(adev, *ras_if, 0);
3909 feature:
3910         kfree(*ras_if);
3911         *ras_if = NULL;
3912         return r;
3913 }
3914
3915 static int gfx_v9_0_late_init(void *handle)
3916 {
3917         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3918         int r;
3919
3920         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3921         if (r)
3922                 return r;
3923
3924         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3925         if (r)
3926                 return r;
3927
3928         r = gfx_v9_0_ecc_late_init(handle);
3929         if (r)
3930                 return r;
3931
3932         return 0;
3933 }
3934
3935 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
3936 {
3937         uint32_t rlc_setting;
3938
3939         /* if RLC is not enabled, do nothing */
3940         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3941         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3942                 return false;
3943
3944         return true;
3945 }
3946
3947 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
3948 {
3949         uint32_t data;
3950         unsigned i;
3951
3952         data = RLC_SAFE_MODE__CMD_MASK;
3953         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3954         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3955
3956         /* wait for RLC_SAFE_MODE */
3957         for (i = 0; i < adev->usec_timeout; i++) {
3958                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3959                         break;
3960                 udelay(1);
3961         }
3962 }
3963
3964 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
3965 {
3966         uint32_t data;
3967
3968         data = RLC_SAFE_MODE__CMD_MASK;
3969         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3970 }
3971
3972 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3973                                                 bool enable)
3974 {
3975         amdgpu_gfx_rlc_enter_safe_mode(adev);
3976
3977         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3978                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
3979                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
3980                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
3981         } else {
3982                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
3983                 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
3984         }
3985
3986         amdgpu_gfx_rlc_exit_safe_mode(adev);
3987 }
3988
3989 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
3990                                                 bool enable)
3991 {
3992         /* TODO: double check if we need to perform under safe mode */
3993         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
3994
3995         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
3996                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
3997         else
3998                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
3999
4000         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4001                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4002         else
4003                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4004
4005         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4006 }
4007
4008 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4009                                                       bool enable)
4010 {
4011         uint32_t data, def;
4012
4013         amdgpu_gfx_rlc_enter_safe_mode(adev);
4014
4015         /* It is disabled by HW by default */
4016         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4017                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4018                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4019
4020                 if (adev->asic_type != CHIP_VEGA12)
4021                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4022
4023                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4024                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4025                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4026
4027                 /* only for Vega10 & Raven1 */
4028                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4029
4030                 if (def != data)
4031                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4032
4033                 /* MGLS is a global flag to control all MGLS in GFX */
4034                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4035                         /* 2 - RLC memory Light sleep */
4036                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4037                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4038                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4039                                 if (def != data)
4040                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4041                         }
4042                         /* 3 - CP memory Light sleep */
4043                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4044                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4045                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4046                                 if (def != data)
4047                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4048                         }
4049                 }
4050         } else {
4051                 /* 1 - MGCG_OVERRIDE */
4052                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4053
4054                 if (adev->asic_type != CHIP_VEGA12)
4055                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4056
4057                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4058                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4059                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4060                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4061
4062                 if (def != data)
4063                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4064
4065                 /* 2 - disable MGLS in RLC */
4066                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4067                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4068                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4069                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4070                 }
4071
4072                 /* 3 - disable MGLS in CP */
4073                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4074                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4075                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4076                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4077                 }
4078         }
4079
4080         amdgpu_gfx_rlc_exit_safe_mode(adev);
4081 }
4082
4083 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4084                                            bool enable)
4085 {
4086         uint32_t data, def;
4087
4088         amdgpu_gfx_rlc_enter_safe_mode(adev);
4089
4090         /* Enable 3D CGCG/CGLS */
4091         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4092                 /* write cmd to clear cgcg/cgls ov */
4093                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4094                 /* unset CGCG override */
4095                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4096                 /* update CGCG and CGLS override bits */
4097                 if (def != data)
4098                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4099
4100                 /* enable 3Dcgcg FSM(0x0000363f) */
4101                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4102
4103                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4104                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4105                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4106                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4107                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4108                 if (def != data)
4109                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4110
4111                 /* set IDLE_POLL_COUNT(0x00900100) */
4112                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4113                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4114                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4115                 if (def != data)
4116                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4117         } else {
4118                 /* Disable CGCG/CGLS */
4119                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4120                 /* disable cgcg, cgls should be disabled */
4121                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4122                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4123                 /* disable cgcg and cgls in FSM */
4124                 if (def != data)
4125                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4126         }
4127
4128         amdgpu_gfx_rlc_exit_safe_mode(adev);
4129 }
4130
4131 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4132                                                       bool enable)
4133 {
4134         uint32_t def, data;
4135
4136         amdgpu_gfx_rlc_enter_safe_mode(adev);
4137
4138         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4139                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4140                 /* unset CGCG override */
4141                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4142                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4143                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4144                 else
4145                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4146                 /* update CGCG and CGLS override bits */
4147                 if (def != data)
4148                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4149
4150                 /* enable cgcg FSM(0x0000363F) */
4151                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4152
4153                 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4154                         RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4155                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4156                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4157                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4158                 if (def != data)
4159                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4160
4161                 /* set IDLE_POLL_COUNT(0x00900100) */
4162                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4163                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4164                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4165                 if (def != data)
4166                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4167         } else {
4168                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4169                 /* reset CGCG/CGLS bits */
4170                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4171                 /* disable cgcg and cgls in FSM */
4172                 if (def != data)
4173                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4174         }
4175
4176         amdgpu_gfx_rlc_exit_safe_mode(adev);
4177 }
4178
4179 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4180                                             bool enable)
4181 {
4182         if (enable) {
4183                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4184                  * ===  MGCG + MGLS ===
4185                  */
4186                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4187                 /* ===  CGCG /CGLS for GFX 3D Only === */
4188                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4189                 /* ===  CGCG + CGLS === */
4190                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4191         } else {
4192                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4193                  * ===  CGCG + CGLS ===
4194                  */
4195                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4196                 /* ===  CGCG /CGLS for GFX 3D Only === */
4197                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4198                 /* ===  MGCG + MGLS === */
4199                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4200         }
4201         return 0;
4202 }
4203
4204 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4205         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4206         .set_safe_mode = gfx_v9_0_set_safe_mode,
4207         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4208         .init = gfx_v9_0_rlc_init,
4209         .get_csb_size = gfx_v9_0_get_csb_size,
4210         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4211         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4212         .resume = gfx_v9_0_rlc_resume,
4213         .stop = gfx_v9_0_rlc_stop,
4214         .reset = gfx_v9_0_rlc_reset,
4215         .start = gfx_v9_0_rlc_start
4216 };
4217
4218 static int gfx_v9_0_set_powergating_state(void *handle,
4219                                           enum amd_powergating_state state)
4220 {
4221         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4222         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4223
4224         switch (adev->asic_type) {
4225         case CHIP_RAVEN:
4226                 if (!enable) {
4227                         amdgpu_gfx_off_ctrl(adev, false);
4228                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4229                 }
4230                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4231                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4232                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4233                 } else {
4234                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4235                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4236                 }
4237
4238                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4239                         gfx_v9_0_enable_cp_power_gating(adev, true);
4240                 else
4241                         gfx_v9_0_enable_cp_power_gating(adev, false);
4242
4243                 /* update gfx cgpg state */
4244                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4245
4246                 /* update mgcg state */
4247                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4248
4249                 if (enable)
4250                         amdgpu_gfx_off_ctrl(adev, true);
4251                 break;
4252         case CHIP_VEGA12:
4253                 if (!enable) {
4254                         amdgpu_gfx_off_ctrl(adev, false);
4255                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4256                 } else {
4257                         amdgpu_gfx_off_ctrl(adev, true);
4258                 }
4259                 break;
4260         default:
4261                 break;
4262         }
4263
4264         return 0;
4265 }
4266
4267 static int gfx_v9_0_set_clockgating_state(void *handle,
4268                                           enum amd_clockgating_state state)
4269 {
4270         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4271
4272         if (amdgpu_sriov_vf(adev))
4273                 return 0;
4274
4275         switch (adev->asic_type) {
4276         case CHIP_VEGA10:
4277         case CHIP_VEGA12:
4278         case CHIP_VEGA20:
4279         case CHIP_RAVEN:
4280                 gfx_v9_0_update_gfx_clock_gating(adev,
4281                                                  state == AMD_CG_STATE_GATE ? true : false);
4282                 break;
4283         default:
4284                 break;
4285         }
4286         return 0;
4287 }
4288
4289 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4290 {
4291         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4292         int data;
4293
4294         if (amdgpu_sriov_vf(adev))
4295                 *flags = 0;
4296
4297         /* AMD_CG_SUPPORT_GFX_MGCG */
4298         data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4299         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4300                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4301
4302         /* AMD_CG_SUPPORT_GFX_CGCG */
4303         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4304         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4305                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4306
4307         /* AMD_CG_SUPPORT_GFX_CGLS */
4308         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4309                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4310
4311         /* AMD_CG_SUPPORT_GFX_RLC_LS */
4312         data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4313         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4314                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4315
4316         /* AMD_CG_SUPPORT_GFX_CP_LS */
4317         data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4318         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4319                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4320
4321         /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4322         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4323         if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4324                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4325
4326         /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4327         if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4328                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4329 }
4330
4331 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4332 {
4333         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4334 }
4335
4336 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4337 {
4338         struct amdgpu_device *adev = ring->adev;
4339         u64 wptr;
4340
4341         /* XXX check if swapping is necessary on BE */
4342         if (ring->use_doorbell) {
4343                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4344         } else {
4345                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4346                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4347         }
4348
4349         return wptr;
4350 }
4351
4352 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4353 {
4354         struct amdgpu_device *adev = ring->adev;
4355
4356         if (ring->use_doorbell) {
4357                 /* XXX check if swapping is necessary on BE */
4358                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4359                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4360         } else {
4361                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4362                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4363         }
4364 }
4365
4366 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4367 {
4368         struct amdgpu_device *adev = ring->adev;
4369         u32 ref_and_mask, reg_mem_engine;
4370         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4371
4372         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4373                 switch (ring->me) {
4374                 case 1:
4375                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4376                         break;
4377                 case 2:
4378                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4379                         break;
4380                 default:
4381                         return;
4382                 }
4383                 reg_mem_engine = 0;
4384         } else {
4385                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4386                 reg_mem_engine = 1; /* pfp */
4387         }
4388
4389         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4390                               adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4391                               adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4392                               ref_and_mask, ref_and_mask, 0x20);
4393 }
4394
4395 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4396                                         struct amdgpu_job *job,
4397                                         struct amdgpu_ib *ib,
4398                                         uint32_t flags)
4399 {
4400         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4401         u32 header, control = 0;
4402
4403         if (ib->flags & AMDGPU_IB_FLAG_CE)
4404                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4405         else
4406                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4407
4408         control |= ib->length_dw | (vmid << 24);
4409
4410         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4411                 control |= INDIRECT_BUFFER_PRE_ENB(1);
4412
4413                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4414                         gfx_v9_0_ring_emit_de_meta(ring);
4415         }
4416
4417         amdgpu_ring_write(ring, header);
4418         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4419         amdgpu_ring_write(ring,
4420 #ifdef __BIG_ENDIAN
4421                 (2 << 0) |
4422 #endif
4423                 lower_32_bits(ib->gpu_addr));
4424         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4425         amdgpu_ring_write(ring, control);
4426 }
4427
4428 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4429                                           struct amdgpu_job *job,
4430                                           struct amdgpu_ib *ib,
4431                                           uint32_t flags)
4432 {
4433         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4434         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4435
4436         /* Currently, there is a high possibility to get wave ID mismatch
4437          * between ME and GDS, leading to a hw deadlock, because ME generates
4438          * different wave IDs than the GDS expects. This situation happens
4439          * randomly when at least 5 compute pipes use GDS ordered append.
4440          * The wave IDs generated by ME are also wrong after suspend/resume.
4441          * Those are probably bugs somewhere else in the kernel driver.
4442          *
4443          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4444          * GDS to 0 for this ring (me/pipe).
4445          */
4446         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4447                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4448                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4449                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4450         }
4451
4452         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4453         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4454         amdgpu_ring_write(ring,
4455 #ifdef __BIG_ENDIAN
4456                                 (2 << 0) |
4457 #endif
4458                                 lower_32_bits(ib->gpu_addr));
4459         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4460         amdgpu_ring_write(ring, control);
4461 }
4462
4463 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4464                                      u64 seq, unsigned flags)
4465 {
4466         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4467         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4468         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4469
4470         /* RELEASE_MEM - flush caches, send int */
4471         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4472         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4473                                                EOP_TC_NC_ACTION_EN) :
4474                                               (EOP_TCL1_ACTION_EN |
4475                                                EOP_TC_ACTION_EN |
4476                                                EOP_TC_WB_ACTION_EN |
4477                                                EOP_TC_MD_ACTION_EN)) |
4478                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4479                                  EVENT_INDEX(5)));
4480         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4481
4482         /*
4483          * the address should be Qword aligned if 64bit write, Dword
4484          * aligned if only send 32bit data low (discard data high)
4485          */
4486         if (write64bit)
4487                 BUG_ON(addr & 0x7);
4488         else
4489                 BUG_ON(addr & 0x3);
4490         amdgpu_ring_write(ring, lower_32_bits(addr));
4491         amdgpu_ring_write(ring, upper_32_bits(addr));
4492         amdgpu_ring_write(ring, lower_32_bits(seq));
4493         amdgpu_ring_write(ring, upper_32_bits(seq));
4494         amdgpu_ring_write(ring, 0);
4495 }
4496
4497 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4498 {
4499         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4500         uint32_t seq = ring->fence_drv.sync_seq;
4501         uint64_t addr = ring->fence_drv.gpu_addr;
4502
4503         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4504                               lower_32_bits(addr), upper_32_bits(addr),
4505                               seq, 0xffffffff, 4);
4506 }
4507
4508 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4509                                         unsigned vmid, uint64_t pd_addr)
4510 {
4511         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4512
4513         /* compute doesn't have PFP */
4514         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4515                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4516                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4517                 amdgpu_ring_write(ring, 0x0);
4518         }
4519 }
4520
4521 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4522 {
4523         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4524 }
4525
4526 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4527 {
4528         u64 wptr;
4529
4530         /* XXX check if swapping is necessary on BE */
4531         if (ring->use_doorbell)
4532                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4533         else
4534                 BUG();
4535         return wptr;
4536 }
4537
4538 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4539                                            bool acquire)
4540 {
4541         struct amdgpu_device *adev = ring->adev;
4542         int pipe_num, tmp, reg;
4543         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4544
4545         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4546
4547         /* first me only has 2 entries, GFX and HP3D */
4548         if (ring->me > 0)
4549                 pipe_num -= 2;
4550
4551         reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4552         tmp = RREG32(reg);
4553         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4554         WREG32(reg, tmp);
4555 }
4556
4557 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4558                                             struct amdgpu_ring *ring,
4559                                             bool acquire)
4560 {
4561         int i, pipe;
4562         bool reserve;
4563         struct amdgpu_ring *iring;
4564
4565         mutex_lock(&adev->gfx.pipe_reserve_mutex);
4566         pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
4567         if (acquire)
4568                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4569         else
4570                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4571
4572         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4573                 /* Clear all reservations - everyone reacquires all resources */
4574                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4575                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4576                                                        true);
4577
4578                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4579                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4580                                                        true);
4581         } else {
4582                 /* Lower all pipes without a current reservation */
4583                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4584                         iring = &adev->gfx.gfx_ring[i];
4585                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4586                                                            iring->me,
4587                                                            iring->pipe,
4588                                                            0);
4589                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4590                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4591                 }
4592
4593                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4594                         iring = &adev->gfx.compute_ring[i];
4595                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4596                                                            iring->me,
4597                                                            iring->pipe,
4598                                                            0);
4599                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4600                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4601                 }
4602         }
4603
4604         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4605 }
4606
4607 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4608                                       struct amdgpu_ring *ring,
4609                                       bool acquire)
4610 {
4611         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4612         uint32_t queue_priority = acquire ? 0xf : 0x0;
4613
4614         mutex_lock(&adev->srbm_mutex);
4615         soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4616
4617         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4618         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4619
4620         soc15_grbm_select(adev, 0, 0, 0, 0);
4621         mutex_unlock(&adev->srbm_mutex);
4622 }
4623
4624 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4625                                                enum drm_sched_priority priority)
4626 {
4627         struct amdgpu_device *adev = ring->adev;
4628         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4629
4630         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4631                 return;
4632
4633         gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4634         gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4635 }
4636
4637 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4638 {
4639         struct amdgpu_device *adev = ring->adev;
4640
4641         /* XXX check if swapping is necessary on BE */
4642         if (ring->use_doorbell) {
4643                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4644                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4645         } else{
4646                 BUG(); /* only DOORBELL method supported on gfx9 now */
4647         }
4648 }
4649
4650 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4651                                          u64 seq, unsigned int flags)
4652 {
4653         struct amdgpu_device *adev = ring->adev;
4654
4655         /* we only allocate 32bit for each seq wb address */
4656         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4657
4658         /* write fence seq to the "addr" */
4659         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4660         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4661                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4662         amdgpu_ring_write(ring, lower_32_bits(addr));
4663         amdgpu_ring_write(ring, upper_32_bits(addr));
4664         amdgpu_ring_write(ring, lower_32_bits(seq));
4665
4666         if (flags & AMDGPU_FENCE_FLAG_INT) {
4667                 /* set register to trigger INT */
4668                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4669                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4670                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4671                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
4672                 amdgpu_ring_write(ring, 0);
4673                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4674         }
4675 }
4676
4677 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
4678 {
4679         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4680         amdgpu_ring_write(ring, 0);
4681 }
4682
4683 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
4684 {
4685         struct v9_ce_ib_state ce_payload = {0};
4686         uint64_t csa_addr;
4687         int cnt;
4688
4689         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
4690         csa_addr = amdgpu_csa_vaddr(ring->adev);
4691
4692         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4693         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4694                                  WRITE_DATA_DST_SEL(8) |
4695                                  WR_CONFIRM) |
4696                                  WRITE_DATA_CACHE_POLICY(0));
4697         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4698         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4699         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
4700 }
4701
4702 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
4703 {
4704         struct v9_de_ib_state de_payload = {0};
4705         uint64_t csa_addr, gds_addr;
4706         int cnt;
4707
4708         csa_addr = amdgpu_csa_vaddr(ring->adev);
4709         gds_addr = csa_addr + 4096;
4710         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
4711         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
4712
4713         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
4714         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4715         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4716                                  WRITE_DATA_DST_SEL(8) |
4717                                  WR_CONFIRM) |
4718                                  WRITE_DATA_CACHE_POLICY(0));
4719         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4720         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4721         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
4722 }
4723
4724 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
4725 {
4726         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4727         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
4728 }
4729
4730 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
4731 {
4732         uint32_t dw2 = 0;
4733
4734         if (amdgpu_sriov_vf(ring->adev))
4735                 gfx_v9_0_ring_emit_ce_meta(ring);
4736
4737         gfx_v9_0_ring_emit_tmz(ring, true);
4738
4739         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4740         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4741                 /* set load_global_config & load_global_uconfig */
4742                 dw2 |= 0x8001;
4743                 /* set load_cs_sh_regs */
4744                 dw2 |= 0x01000000;
4745                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
4746                 dw2 |= 0x10002;
4747
4748                 /* set load_ce_ram if preamble presented */
4749                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
4750                         dw2 |= 0x10000000;
4751         } else {
4752                 /* still load_ce_ram if this is the first time preamble presented
4753                  * although there is no context switch happens.
4754                  */
4755                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
4756                         dw2 |= 0x10000000;
4757         }
4758
4759         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4760         amdgpu_ring_write(ring, dw2);
4761         amdgpu_ring_write(ring, 0);
4762 }
4763
4764 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
4765 {
4766         unsigned ret;
4767         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4768         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
4769         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
4770         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4771         ret = ring->wptr & ring->buf_mask;
4772         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
4773         return ret;
4774 }
4775
4776 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
4777 {
4778         unsigned cur;
4779         BUG_ON(offset > ring->buf_mask);
4780         BUG_ON(ring->ring[offset] != 0x55aa55aa);
4781
4782         cur = (ring->wptr & ring->buf_mask) - 1;
4783         if (likely(cur > offset))
4784                 ring->ring[offset] = cur - offset;
4785         else
4786                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
4787 }
4788
4789 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
4790 {
4791         struct amdgpu_device *adev = ring->adev;
4792
4793         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4794         amdgpu_ring_write(ring, 0 |     /* src: register*/
4795                                 (5 << 8) |      /* dst: memory */
4796                                 (1 << 20));     /* write confirm */
4797         amdgpu_ring_write(ring, reg);
4798         amdgpu_ring_write(ring, 0);
4799         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4800                                 adev->virt.reg_val_offs * 4));
4801         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4802                                 adev->virt.reg_val_offs * 4));
4803 }
4804
4805 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
4806                                     uint32_t val)
4807 {
4808         uint32_t cmd = 0;
4809
4810         switch (ring->funcs->type) {
4811         case AMDGPU_RING_TYPE_GFX:
4812                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4813                 break;
4814         case AMDGPU_RING_TYPE_KIQ:
4815                 cmd = (1 << 16); /* no inc addr */
4816                 break;
4817         default:
4818                 cmd = WR_CONFIRM;
4819                 break;
4820         }
4821         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4822         amdgpu_ring_write(ring, cmd);
4823         amdgpu_ring_write(ring, reg);
4824         amdgpu_ring_write(ring, 0);
4825         amdgpu_ring_write(ring, val);
4826 }
4827
4828 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4829                                         uint32_t val, uint32_t mask)
4830 {
4831         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4832 }
4833
4834 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4835                                                   uint32_t reg0, uint32_t reg1,
4836                                                   uint32_t ref, uint32_t mask)
4837 {
4838         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4839         struct amdgpu_device *adev = ring->adev;
4840         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
4841                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
4842
4843         if (fw_version_ok)
4844                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4845                                       ref, mask, 0x20);
4846         else
4847                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
4848                                                            ref, mask);
4849 }
4850
4851 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4852 {
4853         struct amdgpu_device *adev = ring->adev;
4854         uint32_t value = 0;
4855
4856         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4857         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4858         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4859         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4860         WREG32(mmSQ_CMD, value);
4861 }
4862
4863 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4864                                                  enum amdgpu_interrupt_state state)
4865 {
4866         switch (state) {
4867         case AMDGPU_IRQ_STATE_DISABLE:
4868         case AMDGPU_IRQ_STATE_ENABLE:
4869                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4870                                TIME_STAMP_INT_ENABLE,
4871                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4872                 break;
4873         default:
4874                 break;
4875         }
4876 }
4877
4878 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4879                                                      int me, int pipe,
4880                                                      enum amdgpu_interrupt_state state)
4881 {
4882         u32 mec_int_cntl, mec_int_cntl_reg;
4883
4884         /*
4885          * amdgpu controls only the first MEC. That's why this function only
4886          * handles the setting of interrupts for this specific MEC. All other
4887          * pipes' interrupts are set by amdkfd.
4888          */
4889
4890         if (me == 1) {
4891                 switch (pipe) {
4892                 case 0:
4893                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4894                         break;
4895                 case 1:
4896                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
4897                         break;
4898                 case 2:
4899                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
4900                         break;
4901                 case 3:
4902                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
4903                         break;
4904                 default:
4905                         DRM_DEBUG("invalid pipe %d\n", pipe);
4906                         return;
4907                 }
4908         } else {
4909                 DRM_DEBUG("invalid me %d\n", me);
4910                 return;
4911         }
4912
4913         switch (state) {
4914         case AMDGPU_IRQ_STATE_DISABLE:
4915                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4916                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4917                                              TIME_STAMP_INT_ENABLE, 0);
4918                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4919                 break;
4920         case AMDGPU_IRQ_STATE_ENABLE:
4921                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4922                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4923                                              TIME_STAMP_INT_ENABLE, 1);
4924                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4925                 break;
4926         default:
4927                 break;
4928         }
4929 }
4930
4931 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4932                                              struct amdgpu_irq_src *source,
4933                                              unsigned type,
4934                                              enum amdgpu_interrupt_state state)
4935 {
4936         switch (state) {
4937         case AMDGPU_IRQ_STATE_DISABLE:
4938         case AMDGPU_IRQ_STATE_ENABLE:
4939                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4940                                PRIV_REG_INT_ENABLE,
4941                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4942                 break;
4943         default:
4944                 break;
4945         }
4946
4947         return 0;
4948 }
4949
4950 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4951                                               struct amdgpu_irq_src *source,
4952                                               unsigned type,
4953                                               enum amdgpu_interrupt_state state)
4954 {
4955         switch (state) {
4956         case AMDGPU_IRQ_STATE_DISABLE:
4957         case AMDGPU_IRQ_STATE_ENABLE:
4958                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4959                                PRIV_INSTR_INT_ENABLE,
4960                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4961         default:
4962                 break;
4963         }
4964
4965         return 0;
4966 }
4967
4968 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
4969         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4970                         CP_ECC_ERROR_INT_ENABLE, 1)
4971
4972 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
4973         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4974                         CP_ECC_ERROR_INT_ENABLE, 0)
4975
4976 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
4977                                               struct amdgpu_irq_src *source,
4978                                               unsigned type,
4979                                               enum amdgpu_interrupt_state state)
4980 {
4981         switch (state) {
4982         case AMDGPU_IRQ_STATE_DISABLE:
4983                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4984                                 CP_ECC_ERROR_INT_ENABLE, 0);
4985                 DISABLE_ECC_ON_ME_PIPE(1, 0);
4986                 DISABLE_ECC_ON_ME_PIPE(1, 1);
4987                 DISABLE_ECC_ON_ME_PIPE(1, 2);
4988                 DISABLE_ECC_ON_ME_PIPE(1, 3);
4989                 break;
4990
4991         case AMDGPU_IRQ_STATE_ENABLE:
4992                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4993                                 CP_ECC_ERROR_INT_ENABLE, 1);
4994                 ENABLE_ECC_ON_ME_PIPE(1, 0);
4995                 ENABLE_ECC_ON_ME_PIPE(1, 1);
4996                 ENABLE_ECC_ON_ME_PIPE(1, 2);
4997                 ENABLE_ECC_ON_ME_PIPE(1, 3);
4998                 break;
4999         default:
5000                 break;
5001         }
5002
5003         return 0;
5004 }
5005
5006
5007 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5008                                             struct amdgpu_irq_src *src,
5009                                             unsigned type,
5010                                             enum amdgpu_interrupt_state state)
5011 {
5012         switch (type) {
5013         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5014                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5015                 break;
5016         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5017                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5018                 break;
5019         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5020                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5021                 break;
5022         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5023                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5024                 break;
5025         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5026                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5027                 break;
5028         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5029                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5030                 break;
5031         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5032                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5033                 break;
5034         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5035                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5036                 break;
5037         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5038                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5039                 break;
5040         default:
5041                 break;
5042         }
5043         return 0;
5044 }
5045
5046 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5047                             struct amdgpu_irq_src *source,
5048                             struct amdgpu_iv_entry *entry)
5049 {
5050         int i;
5051         u8 me_id, pipe_id, queue_id;
5052         struct amdgpu_ring *ring;
5053
5054         DRM_DEBUG("IH: CP EOP\n");
5055         me_id = (entry->ring_id & 0x0c) >> 2;
5056         pipe_id = (entry->ring_id & 0x03) >> 0;
5057         queue_id = (entry->ring_id & 0x70) >> 4;
5058
5059         switch (me_id) {
5060         case 0:
5061                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5062                 break;
5063         case 1:
5064         case 2:
5065                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5066                         ring = &adev->gfx.compute_ring[i];
5067                         /* Per-queue interrupt is supported for MEC starting from VI.
5068                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5069                           */
5070                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5071                                 amdgpu_fence_process(ring);
5072                 }
5073                 break;
5074         }
5075         return 0;
5076 }
5077
5078 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5079                            struct amdgpu_iv_entry *entry)
5080 {
5081         u8 me_id, pipe_id, queue_id;
5082         struct amdgpu_ring *ring;
5083         int i;
5084
5085         me_id = (entry->ring_id & 0x0c) >> 2;
5086         pipe_id = (entry->ring_id & 0x03) >> 0;
5087         queue_id = (entry->ring_id & 0x70) >> 4;
5088
5089         switch (me_id) {
5090         case 0:
5091                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5092                 break;
5093         case 1:
5094         case 2:
5095                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5096                         ring = &adev->gfx.compute_ring[i];
5097                         if (ring->me == me_id && ring->pipe == pipe_id &&
5098                             ring->queue == queue_id)
5099                                 drm_sched_fault(&ring->sched);
5100                 }
5101                 break;
5102         }
5103 }
5104
5105 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5106                                  struct amdgpu_irq_src *source,
5107                                  struct amdgpu_iv_entry *entry)
5108 {
5109         DRM_ERROR("Illegal register access in command stream\n");
5110         gfx_v9_0_fault(adev, entry);
5111         return 0;
5112 }
5113
5114 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5115                                   struct amdgpu_irq_src *source,
5116                                   struct amdgpu_iv_entry *entry)
5117 {
5118         DRM_ERROR("Illegal instruction in command stream\n");
5119         gfx_v9_0_fault(adev, entry);
5120         return 0;
5121 }
5122
5123 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5124                 struct amdgpu_iv_entry *entry)
5125 {
5126         /* TODO ue will trigger an interrupt. */
5127         kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5128         amdgpu_ras_reset_gpu(adev, 0);
5129         return AMDGPU_RAS_UE;
5130 }
5131
5132 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
5133                                   struct amdgpu_irq_src *source,
5134                                   struct amdgpu_iv_entry *entry)
5135 {
5136         struct ras_common_if *ras_if = adev->gfx.ras_if;
5137         struct ras_dispatch_if ih_data = {
5138                 .entry = entry,
5139         };
5140
5141         if (!ras_if)
5142                 return 0;
5143
5144         ih_data.head = *ras_if;
5145
5146         DRM_ERROR("CP ECC ERROR IRQ\n");
5147         amdgpu_ras_interrupt_dispatch(adev, &ih_data);
5148         return 0;
5149 }
5150
5151 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
5152         .name = "gfx_v9_0",
5153         .early_init = gfx_v9_0_early_init,
5154         .late_init = gfx_v9_0_late_init,
5155         .sw_init = gfx_v9_0_sw_init,
5156         .sw_fini = gfx_v9_0_sw_fini,
5157         .hw_init = gfx_v9_0_hw_init,
5158         .hw_fini = gfx_v9_0_hw_fini,
5159         .suspend = gfx_v9_0_suspend,
5160         .resume = gfx_v9_0_resume,
5161         .is_idle = gfx_v9_0_is_idle,
5162         .wait_for_idle = gfx_v9_0_wait_for_idle,
5163         .soft_reset = gfx_v9_0_soft_reset,
5164         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
5165         .set_powergating_state = gfx_v9_0_set_powergating_state,
5166         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
5167 };
5168
5169 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
5170         .type = AMDGPU_RING_TYPE_GFX,
5171         .align_mask = 0xff,
5172         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5173         .support_64bit_ptrs = true,
5174         .vmhub = AMDGPU_GFXHUB,
5175         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
5176         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
5177         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
5178         .emit_frame_size = /* totally 242 maximum if 16 IBs */
5179                 5 +  /* COND_EXEC */
5180                 7 +  /* PIPELINE_SYNC */
5181                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5182                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5183                 2 + /* VM_FLUSH */
5184                 8 +  /* FENCE for VM_FLUSH */
5185                 20 + /* GDS switch */
5186                 4 + /* double SWITCH_BUFFER,
5187                        the first COND_EXEC jump to the place just
5188                            prior to this double SWITCH_BUFFER  */
5189                 5 + /* COND_EXEC */
5190                 7 +      /*     HDP_flush */
5191                 4 +      /*     VGT_flush */
5192                 14 + /* CE_META */
5193                 31 + /* DE_META */
5194                 3 + /* CNTX_CTRL */
5195                 5 + /* HDP_INVL */
5196                 8 + 8 + /* FENCE x2 */
5197                 2, /* SWITCH_BUFFER */
5198         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
5199         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
5200         .emit_fence = gfx_v9_0_ring_emit_fence,
5201         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5202         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5203         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5204         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5205         .test_ring = gfx_v9_0_ring_test_ring,
5206         .test_ib = gfx_v9_0_ring_test_ib,
5207         .insert_nop = amdgpu_ring_insert_nop,
5208         .pad_ib = amdgpu_ring_generic_pad_ib,
5209         .emit_switch_buffer = gfx_v9_ring_emit_sb,
5210         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
5211         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
5212         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
5213         .emit_tmz = gfx_v9_0_ring_emit_tmz,
5214         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5215         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5216         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5217         .soft_recovery = gfx_v9_0_ring_soft_recovery,
5218 };
5219
5220 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
5221         .type = AMDGPU_RING_TYPE_COMPUTE,
5222         .align_mask = 0xff,
5223         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5224         .support_64bit_ptrs = true,
5225         .vmhub = AMDGPU_GFXHUB,
5226         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5227         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5228         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5229         .emit_frame_size =
5230                 20 + /* gfx_v9_0_ring_emit_gds_switch */
5231                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
5232                 5 + /* hdp invalidate */
5233                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5234                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5235                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5236                 2 + /* gfx_v9_0_ring_emit_vm_flush */
5237                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
5238         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
5239         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
5240         .emit_fence = gfx_v9_0_ring_emit_fence,
5241         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5242         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5243         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5244         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5245         .test_ring = gfx_v9_0_ring_test_ring,
5246         .test_ib = gfx_v9_0_ring_test_ib,
5247         .insert_nop = amdgpu_ring_insert_nop,
5248         .pad_ib = amdgpu_ring_generic_pad_ib,
5249         .set_priority = gfx_v9_0_ring_set_priority_compute,
5250         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5251         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5252         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5253 };
5254
5255 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
5256         .type = AMDGPU_RING_TYPE_KIQ,
5257         .align_mask = 0xff,
5258         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5259         .support_64bit_ptrs = true,
5260         .vmhub = AMDGPU_GFXHUB,
5261         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5262         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5263         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5264         .emit_frame_size =
5265                 20 + /* gfx_v9_0_ring_emit_gds_switch */
5266                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
5267                 5 + /* hdp invalidate */
5268                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5269                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5270                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5271                 2 + /* gfx_v9_0_ring_emit_vm_flush */
5272                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5273         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
5274         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
5275         .test_ring = gfx_v9_0_ring_test_ring,
5276         .insert_nop = amdgpu_ring_insert_nop,
5277         .pad_ib = amdgpu_ring_generic_pad_ib,
5278         .emit_rreg = gfx_v9_0_ring_emit_rreg,
5279         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5280         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5281         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5282 };
5283
5284 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
5285 {
5286         int i;
5287
5288         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
5289
5290         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5291                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
5292
5293         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5294                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
5295 }
5296
5297 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
5298         .set = gfx_v9_0_set_eop_interrupt_state,
5299         .process = gfx_v9_0_eop_irq,
5300 };
5301
5302 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
5303         .set = gfx_v9_0_set_priv_reg_fault_state,
5304         .process = gfx_v9_0_priv_reg_irq,
5305 };
5306
5307 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
5308         .set = gfx_v9_0_set_priv_inst_fault_state,
5309         .process = gfx_v9_0_priv_inst_irq,
5310 };
5311
5312 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
5313         .set = gfx_v9_0_set_cp_ecc_error_state,
5314         .process = gfx_v9_0_cp_ecc_error_irq,
5315 };
5316
5317
5318 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
5319 {
5320         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5321         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
5322
5323         adev->gfx.priv_reg_irq.num_types = 1;
5324         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
5325
5326         adev->gfx.priv_inst_irq.num_types = 1;
5327         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
5328
5329         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
5330         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
5331 }
5332
5333 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
5334 {
5335         switch (adev->asic_type) {
5336         case CHIP_VEGA10:
5337         case CHIP_VEGA12:
5338         case CHIP_VEGA20:
5339         case CHIP_RAVEN:
5340                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
5341                 break;
5342         default:
5343                 break;
5344         }
5345 }
5346
5347 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
5348 {
5349         /* init asci gds info */
5350         switch (adev->asic_type) {
5351         case CHIP_VEGA10:
5352         case CHIP_VEGA12:
5353         case CHIP_VEGA20:
5354                 adev->gds.gds_size = 0x10000;
5355                 break;
5356         case CHIP_RAVEN:
5357                 adev->gds.gds_size = 0x1000;
5358                 break;
5359         default:
5360                 adev->gds.gds_size = 0x10000;
5361                 break;
5362         }
5363
5364         switch (adev->asic_type) {
5365         case CHIP_VEGA10:
5366         case CHIP_VEGA20:
5367                 adev->gds.gds_compute_max_wave_id = 0x7ff;
5368                 break;
5369         case CHIP_VEGA12:
5370                 adev->gds.gds_compute_max_wave_id = 0x27f;
5371                 break;
5372         case CHIP_RAVEN:
5373                 if (adev->rev_id >= 0x8)
5374                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
5375                 else
5376                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
5377                 break;
5378         default:
5379                 /* this really depends on the chip */
5380                 adev->gds.gds_compute_max_wave_id = 0x7ff;
5381                 break;
5382         }
5383
5384         adev->gds.gws_size = 64;
5385         adev->gds.oa_size = 16;
5386 }
5387
5388 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
5389                                                  u32 bitmap)
5390 {
5391         u32 data;
5392
5393         if (!bitmap)
5394                 return;
5395
5396         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5397         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5398
5399         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
5400 }
5401
5402 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5403 {
5404         u32 data, mask;
5405
5406         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
5407         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
5408
5409         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5410         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5411
5412         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
5413
5414         return (~data) & mask;
5415 }
5416
5417 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
5418                                  struct amdgpu_cu_info *cu_info)
5419 {
5420         int i, j, k, counter, active_cu_number = 0;
5421         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5422         unsigned disable_masks[4 * 2];
5423
5424         if (!adev || !cu_info)
5425                 return -EINVAL;
5426
5427         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5428
5429         mutex_lock(&adev->grbm_idx_mutex);
5430         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5431                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5432                         mask = 1;
5433                         ao_bitmap = 0;
5434                         counter = 0;
5435                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
5436                         if (i < 4 && j < 2)
5437                                 gfx_v9_0_set_user_cu_inactive_bitmap(
5438                                         adev, disable_masks[i * 2 + j]);
5439                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
5440                         cu_info->bitmap[i][j] = bitmap;
5441
5442                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5443                                 if (bitmap & mask) {
5444                                         if (counter < adev->gfx.config.max_cu_per_sh)
5445                                                 ao_bitmap |= mask;
5446                                         counter ++;
5447                                 }
5448                                 mask <<= 1;
5449                         }
5450                         active_cu_number += counter;
5451                         if (i < 2 && j < 2)
5452                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5453                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5454                 }
5455         }
5456         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5457         mutex_unlock(&adev->grbm_idx_mutex);
5458
5459         cu_info->number = active_cu_number;
5460         cu_info->ao_cu_mask = ao_cu_mask;
5461         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5462
5463         return 0;
5464 }
5465
5466 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
5467 {
5468         .type = AMD_IP_BLOCK_TYPE_GFX,
5469         .major = 9,
5470         .minor = 0,
5471         .rev = 0,
5472         .funcs = &gfx_v9_0_ip_funcs,
5473 };