2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
31 #include "amdgpu_gfx.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "hdp/hdp_4_0_offset.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
49 #include "amdgpu_ras.h"
51 #define GFX9_NUM_GFX_RINGS 1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
56 #define mmPWR_MISC_CNTL_STATUS 0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L
63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
84 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
86 MODULE_FIRMWARE("amdgpu/raven_me.bin");
87 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
107 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
109 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
110 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
111 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
112 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
113 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
114 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
115 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
116 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
117 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
118 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
119 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
120 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
121 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
122 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
123 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
124 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
125 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
126 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
127 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
128 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
131 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
133 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
134 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
135 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
136 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
137 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
138 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
139 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
140 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
141 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
142 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
143 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
144 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
145 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
146 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
147 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
148 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
149 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
150 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
153 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
155 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
156 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
157 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
158 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
159 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
160 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
161 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
162 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
163 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
164 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
165 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
168 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
170 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
171 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
172 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
173 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
174 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
175 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
176 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
177 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
178 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
179 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
180 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
181 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
182 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
183 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
184 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
185 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
186 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
187 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
188 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
189 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
190 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
191 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
192 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
193 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
196 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
198 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
199 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
200 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
201 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
202 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
203 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
204 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
207 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
209 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
210 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
211 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
212 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
213 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
214 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
215 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
216 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
217 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
218 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
219 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
220 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
221 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
222 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
223 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
224 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
225 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
226 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
227 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
230 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
232 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
233 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
234 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
237 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
239 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
240 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
241 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
242 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
243 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
244 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
245 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
246 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
247 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
248 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
249 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
250 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
251 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
252 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
253 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
254 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
257 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
259 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
260 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
261 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
262 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
263 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
264 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
265 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
266 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
267 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
268 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
269 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
270 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
271 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
274 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
276 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
277 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
278 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
279 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
280 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
281 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
282 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
283 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
286 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
288 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
289 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
290 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
291 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
292 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
293 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
294 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
295 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
298 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
299 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
300 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
301 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
303 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
304 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
305 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
306 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
307 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
308 struct amdgpu_cu_info *cu_info);
309 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
310 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
311 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
312 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
314 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
316 switch (adev->asic_type) {
318 if (!amdgpu_virt_support_skip_setting(adev)) {
319 soc15_program_register_sequence(adev,
320 golden_settings_gc_9_0,
321 ARRAY_SIZE(golden_settings_gc_9_0));
322 soc15_program_register_sequence(adev,
323 golden_settings_gc_9_0_vg10,
324 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
328 soc15_program_register_sequence(adev,
329 golden_settings_gc_9_2_1,
330 ARRAY_SIZE(golden_settings_gc_9_2_1));
331 soc15_program_register_sequence(adev,
332 golden_settings_gc_9_2_1_vg12,
333 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
336 soc15_program_register_sequence(adev,
337 golden_settings_gc_9_0,
338 ARRAY_SIZE(golden_settings_gc_9_0));
339 soc15_program_register_sequence(adev,
340 golden_settings_gc_9_0_vg20,
341 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
344 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
345 ARRAY_SIZE(golden_settings_gc_9_1));
346 if (adev->rev_id >= 8)
347 soc15_program_register_sequence(adev,
348 golden_settings_gc_9_1_rv2,
349 ARRAY_SIZE(golden_settings_gc_9_1_rv2));
351 soc15_program_register_sequence(adev,
352 golden_settings_gc_9_1_rv1,
353 ARRAY_SIZE(golden_settings_gc_9_1_rv1));
359 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
360 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
363 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
365 adev->gfx.scratch.num_reg = 8;
366 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
367 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
370 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
371 bool wc, uint32_t reg, uint32_t val)
373 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
374 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
375 WRITE_DATA_DST_SEL(0) |
376 (wc ? WR_CONFIRM : 0));
377 amdgpu_ring_write(ring, reg);
378 amdgpu_ring_write(ring, 0);
379 amdgpu_ring_write(ring, val);
382 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
383 int mem_space, int opt, uint32_t addr0,
384 uint32_t addr1, uint32_t ref, uint32_t mask,
387 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
388 amdgpu_ring_write(ring,
389 /* memory (1) or register (0) */
390 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
391 WAIT_REG_MEM_OPERATION(opt) | /* wait */
392 WAIT_REG_MEM_FUNCTION(3) | /* equal */
393 WAIT_REG_MEM_ENGINE(eng_sel)));
396 BUG_ON(addr0 & 0x3); /* Dword align */
397 amdgpu_ring_write(ring, addr0);
398 amdgpu_ring_write(ring, addr1);
399 amdgpu_ring_write(ring, ref);
400 amdgpu_ring_write(ring, mask);
401 amdgpu_ring_write(ring, inv); /* poll interval */
404 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
406 struct amdgpu_device *adev = ring->adev;
412 r = amdgpu_gfx_scratch_get(adev, &scratch);
416 WREG32(scratch, 0xCAFEDEAD);
417 r = amdgpu_ring_alloc(ring, 3);
419 goto error_free_scratch;
421 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
422 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
423 amdgpu_ring_write(ring, 0xDEADBEEF);
424 amdgpu_ring_commit(ring);
426 for (i = 0; i < adev->usec_timeout; i++) {
427 tmp = RREG32(scratch);
428 if (tmp == 0xDEADBEEF)
433 if (i >= adev->usec_timeout)
437 amdgpu_gfx_scratch_free(adev, scratch);
441 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
443 struct amdgpu_device *adev = ring->adev;
445 struct dma_fence *f = NULL;
452 r = amdgpu_device_wb_get(adev, &index);
456 gpu_addr = adev->wb.gpu_addr + (index * 4);
457 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
458 memset(&ib, 0, sizeof(ib));
459 r = amdgpu_ib_get(adev, NULL, 16, &ib);
463 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
464 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
465 ib.ptr[2] = lower_32_bits(gpu_addr);
466 ib.ptr[3] = upper_32_bits(gpu_addr);
467 ib.ptr[4] = 0xDEADBEEF;
470 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
474 r = dma_fence_wait_timeout(f, false, timeout);
482 tmp = adev->wb.wb[index];
483 if (tmp == 0xDEADBEEF)
489 amdgpu_ib_free(adev, &ib, NULL);
492 amdgpu_device_wb_free(adev, index);
497 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
499 release_firmware(adev->gfx.pfp_fw);
500 adev->gfx.pfp_fw = NULL;
501 release_firmware(adev->gfx.me_fw);
502 adev->gfx.me_fw = NULL;
503 release_firmware(adev->gfx.ce_fw);
504 adev->gfx.ce_fw = NULL;
505 release_firmware(adev->gfx.rlc_fw);
506 adev->gfx.rlc_fw = NULL;
507 release_firmware(adev->gfx.mec_fw);
508 adev->gfx.mec_fw = NULL;
509 release_firmware(adev->gfx.mec2_fw);
510 adev->gfx.mec2_fw = NULL;
512 kfree(adev->gfx.rlc.register_list_format);
515 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
517 const struct rlc_firmware_header_v2_1 *rlc_hdr;
519 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
520 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
521 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
522 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
523 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
524 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
525 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
526 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
527 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
528 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
529 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
530 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
531 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
532 adev->gfx.rlc.reg_list_format_direct_reg_list_length =
533 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
536 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
538 adev->gfx.me_fw_write_wait = false;
539 adev->gfx.mec_fw_write_wait = false;
541 switch (adev->asic_type) {
543 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
544 (adev->gfx.me_feature_version >= 42) &&
545 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
546 (adev->gfx.pfp_feature_version >= 42))
547 adev->gfx.me_fw_write_wait = true;
549 if ((adev->gfx.mec_fw_version >= 0x00000193) &&
550 (adev->gfx.mec_feature_version >= 42))
551 adev->gfx.mec_fw_write_wait = true;
554 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
555 (adev->gfx.me_feature_version >= 44) &&
556 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
557 (adev->gfx.pfp_feature_version >= 44))
558 adev->gfx.me_fw_write_wait = true;
560 if ((adev->gfx.mec_fw_version >= 0x00000196) &&
561 (adev->gfx.mec_feature_version >= 44))
562 adev->gfx.mec_fw_write_wait = true;
565 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
566 (adev->gfx.me_feature_version >= 44) &&
567 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
568 (adev->gfx.pfp_feature_version >= 44))
569 adev->gfx.me_fw_write_wait = true;
571 if ((adev->gfx.mec_fw_version >= 0x00000197) &&
572 (adev->gfx.mec_feature_version >= 44))
573 adev->gfx.mec_fw_write_wait = true;
576 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
577 (adev->gfx.me_feature_version >= 42) &&
578 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
579 (adev->gfx.pfp_feature_version >= 42))
580 adev->gfx.me_fw_write_wait = true;
582 if ((adev->gfx.mec_fw_version >= 0x00000192) &&
583 (adev->gfx.mec_feature_version >= 42))
584 adev->gfx.mec_fw_write_wait = true;
591 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
593 switch (adev->asic_type) {
599 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
601 if ((adev->gfx.rlc_fw_version != 106 &&
602 adev->gfx.rlc_fw_version < 531) ||
603 (adev->gfx.rlc_fw_version == 53815) ||
604 (adev->gfx.rlc_feature_version < 1) ||
605 !adev->gfx.rlc.is_rlc_v2_1)
606 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
613 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
615 const char *chip_name;
618 struct amdgpu_firmware_info *info = NULL;
619 const struct common_firmware_header *header = NULL;
620 const struct gfx_firmware_header_v1_0 *cp_hdr;
621 const struct rlc_firmware_header_v2_0 *rlc_hdr;
622 unsigned int *tmp = NULL;
624 uint16_t version_major;
625 uint16_t version_minor;
626 uint32_t smu_version;
630 switch (adev->asic_type) {
632 chip_name = "vega10";
635 chip_name = "vega12";
638 chip_name = "vega20";
641 if (adev->rev_id >= 8)
642 chip_name = "raven2";
643 else if (adev->pdev->device == 0x15d8)
644 chip_name = "picasso";
652 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
653 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
656 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
659 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
660 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
661 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
663 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
664 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
667 err = amdgpu_ucode_validate(adev->gfx.me_fw);
670 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
671 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
672 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
674 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
675 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
678 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
681 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
682 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
683 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
686 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
687 * instead of picasso_rlc.bin.
689 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
690 * or revision >= 0xD8 && revision <= 0xDF
691 * otherwise is PCO FP5
693 if (!strcmp(chip_name, "picasso") &&
694 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
695 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
696 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
697 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
698 (smu_version >= 0x41e2b))
700 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
702 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
704 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
705 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
708 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
709 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
711 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
712 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
713 if (version_major == 2 && version_minor == 1)
714 adev->gfx.rlc.is_rlc_v2_1 = true;
716 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
717 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
718 adev->gfx.rlc.save_and_restore_offset =
719 le32_to_cpu(rlc_hdr->save_and_restore_offset);
720 adev->gfx.rlc.clear_state_descriptor_offset =
721 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
722 adev->gfx.rlc.avail_scratch_ram_locations =
723 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
724 adev->gfx.rlc.reg_restore_list_size =
725 le32_to_cpu(rlc_hdr->reg_restore_list_size);
726 adev->gfx.rlc.reg_list_format_start =
727 le32_to_cpu(rlc_hdr->reg_list_format_start);
728 adev->gfx.rlc.reg_list_format_separate_start =
729 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
730 adev->gfx.rlc.starting_offsets_start =
731 le32_to_cpu(rlc_hdr->starting_offsets_start);
732 adev->gfx.rlc.reg_list_format_size_bytes =
733 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
734 adev->gfx.rlc.reg_list_size_bytes =
735 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
736 adev->gfx.rlc.register_list_format =
737 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
738 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
739 if (!adev->gfx.rlc.register_list_format) {
744 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
745 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
746 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
747 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
749 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
751 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
752 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
753 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
754 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
756 if (adev->gfx.rlc.is_rlc_v2_1)
757 gfx_v9_0_init_rlc_ext_microcode(adev);
759 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
760 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
763 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
766 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
767 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
768 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
771 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
772 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
774 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
777 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
778 adev->gfx.mec2_fw->data;
779 adev->gfx.mec2_fw_version =
780 le32_to_cpu(cp_hdr->header.ucode_version);
781 adev->gfx.mec2_feature_version =
782 le32_to_cpu(cp_hdr->ucode_feature_version);
785 adev->gfx.mec2_fw = NULL;
788 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
789 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
790 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
791 info->fw = adev->gfx.pfp_fw;
792 header = (const struct common_firmware_header *)info->fw->data;
793 adev->firmware.fw_size +=
794 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
796 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
797 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
798 info->fw = adev->gfx.me_fw;
799 header = (const struct common_firmware_header *)info->fw->data;
800 adev->firmware.fw_size +=
801 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
803 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
804 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
805 info->fw = adev->gfx.ce_fw;
806 header = (const struct common_firmware_header *)info->fw->data;
807 adev->firmware.fw_size +=
808 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
810 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
811 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
812 info->fw = adev->gfx.rlc_fw;
813 header = (const struct common_firmware_header *)info->fw->data;
814 adev->firmware.fw_size +=
815 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
817 if (adev->gfx.rlc.is_rlc_v2_1 &&
818 adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
819 adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
820 adev->gfx.rlc.save_restore_list_srm_size_bytes) {
821 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
822 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
823 info->fw = adev->gfx.rlc_fw;
824 adev->firmware.fw_size +=
825 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
827 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
828 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
829 info->fw = adev->gfx.rlc_fw;
830 adev->firmware.fw_size +=
831 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
833 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
834 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
835 info->fw = adev->gfx.rlc_fw;
836 adev->firmware.fw_size +=
837 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
840 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
841 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
842 info->fw = adev->gfx.mec_fw;
843 header = (const struct common_firmware_header *)info->fw->data;
844 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
845 adev->firmware.fw_size +=
846 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
848 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
849 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
850 info->fw = adev->gfx.mec_fw;
851 adev->firmware.fw_size +=
852 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
854 if (adev->gfx.mec2_fw) {
855 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
856 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
857 info->fw = adev->gfx.mec2_fw;
858 header = (const struct common_firmware_header *)info->fw->data;
859 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
860 adev->firmware.fw_size +=
861 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
862 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
863 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
864 info->fw = adev->gfx.mec2_fw;
865 adev->firmware.fw_size +=
866 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
872 gfx_v9_0_check_if_need_gfxoff(adev);
873 gfx_v9_0_check_fw_write_wait(adev);
876 "gfx9: Failed to load firmware \"%s\"\n",
878 release_firmware(adev->gfx.pfp_fw);
879 adev->gfx.pfp_fw = NULL;
880 release_firmware(adev->gfx.me_fw);
881 adev->gfx.me_fw = NULL;
882 release_firmware(adev->gfx.ce_fw);
883 adev->gfx.ce_fw = NULL;
884 release_firmware(adev->gfx.rlc_fw);
885 adev->gfx.rlc_fw = NULL;
886 release_firmware(adev->gfx.mec_fw);
887 adev->gfx.mec_fw = NULL;
888 release_firmware(adev->gfx.mec2_fw);
889 adev->gfx.mec2_fw = NULL;
894 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
897 const struct cs_section_def *sect = NULL;
898 const struct cs_extent_def *ext = NULL;
900 /* begin clear state */
902 /* context control state */
905 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
906 for (ext = sect->section; ext->extent != NULL; ++ext) {
907 if (sect->id == SECT_CONTEXT)
908 count += 2 + ext->reg_count;
914 /* end clear state */
922 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
923 volatile u32 *buffer)
926 const struct cs_section_def *sect = NULL;
927 const struct cs_extent_def *ext = NULL;
929 if (adev->gfx.rlc.cs_data == NULL)
934 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
935 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
937 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
938 buffer[count++] = cpu_to_le32(0x80000000);
939 buffer[count++] = cpu_to_le32(0x80000000);
941 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
942 for (ext = sect->section; ext->extent != NULL; ++ext) {
943 if (sect->id == SECT_CONTEXT) {
945 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
946 buffer[count++] = cpu_to_le32(ext->reg_index -
947 PACKET3_SET_CONTEXT_REG_START);
948 for (i = 0; i < ext->reg_count; i++)
949 buffer[count++] = cpu_to_le32(ext->extent[i]);
956 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
957 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
959 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
960 buffer[count++] = cpu_to_le32(0);
963 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
965 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
966 uint32_t pg_always_on_cu_num = 2;
967 uint32_t always_on_cu_num;
969 uint32_t mask, cu_bitmap, counter;
971 if (adev->flags & AMD_IS_APU)
972 always_on_cu_num = 4;
973 else if (adev->asic_type == CHIP_VEGA12)
974 always_on_cu_num = 8;
976 always_on_cu_num = 12;
978 mutex_lock(&adev->grbm_idx_mutex);
979 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
980 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
984 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
986 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
987 if (cu_info->bitmap[i][j] & mask) {
988 if (counter == pg_always_on_cu_num)
989 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
990 if (counter < always_on_cu_num)
999 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1000 cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1003 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1004 mutex_unlock(&adev->grbm_idx_mutex);
1007 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1011 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1012 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1013 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1014 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1015 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1017 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1018 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1020 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1021 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1023 mutex_lock(&adev->grbm_idx_mutex);
1024 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1025 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1026 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1028 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1029 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1030 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1031 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1032 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1034 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1035 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1038 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1041 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1042 * programmed in gfx_v9_0_init_always_on_cu_mask()
1045 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1046 * but used for RLC_LB_CNTL configuration */
1047 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1048 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1049 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1050 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1051 mutex_unlock(&adev->grbm_idx_mutex);
1053 gfx_v9_0_init_always_on_cu_mask(adev);
1056 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1060 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1061 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1062 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1063 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1064 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1066 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1067 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1069 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1070 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1072 mutex_lock(&adev->grbm_idx_mutex);
1073 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1074 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1075 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1077 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1078 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1079 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1080 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1081 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1083 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1084 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1087 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1090 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1091 * programmed in gfx_v9_0_init_always_on_cu_mask()
1094 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1095 * but used for RLC_LB_CNTL configuration */
1096 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1097 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1098 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1099 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1100 mutex_unlock(&adev->grbm_idx_mutex);
1102 gfx_v9_0_init_always_on_cu_mask(adev);
1105 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1107 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1110 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1115 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1117 const struct cs_section_def *cs_data;
1120 adev->gfx.rlc.cs_data = gfx9_cs_data;
1122 cs_data = adev->gfx.rlc.cs_data;
1125 /* init clear state block */
1126 r = amdgpu_gfx_rlc_init_csb(adev);
1131 if (adev->asic_type == CHIP_RAVEN) {
1132 /* TODO: double check the cp_table_size for RV */
1133 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1134 r = amdgpu_gfx_rlc_init_cpt(adev);
1139 switch (adev->asic_type) {
1141 gfx_v9_0_init_lbpw(adev);
1144 gfx_v9_4_init_lbpw(adev);
1153 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1157 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1158 if (unlikely(r != 0))
1161 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1162 AMDGPU_GEM_DOMAIN_VRAM);
1164 adev->gfx.rlc.clear_state_gpu_addr =
1165 amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1167 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1172 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1176 if (!adev->gfx.rlc.clear_state_obj)
1179 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1180 if (likely(r == 0)) {
1181 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1182 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1186 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1188 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1189 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1192 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1196 const __le32 *fw_data;
1199 size_t mec_hpd_size;
1201 const struct gfx_firmware_header_v1_0 *mec_hdr;
1203 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1205 /* take ownership of the relevant compute queues */
1206 amdgpu_gfx_compute_queue_acquire(adev);
1207 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1209 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1210 AMDGPU_GEM_DOMAIN_VRAM,
1211 &adev->gfx.mec.hpd_eop_obj,
1212 &adev->gfx.mec.hpd_eop_gpu_addr,
1215 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1216 gfx_v9_0_mec_fini(adev);
1220 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1222 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1223 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1225 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1227 fw_data = (const __le32 *)
1228 (adev->gfx.mec_fw->data +
1229 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1230 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1232 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1233 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1234 &adev->gfx.mec.mec_fw_obj,
1235 &adev->gfx.mec.mec_fw_gpu_addr,
1238 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1239 gfx_v9_0_mec_fini(adev);
1243 memcpy(fw, fw_data, fw_size);
1245 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1246 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1251 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1253 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1254 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1255 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1256 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1257 (SQ_IND_INDEX__FORCE_READ_MASK));
1258 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1261 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1262 uint32_t wave, uint32_t thread,
1263 uint32_t regno, uint32_t num, uint32_t *out)
1265 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1266 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1267 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1268 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1269 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1270 (SQ_IND_INDEX__FORCE_READ_MASK) |
1271 (SQ_IND_INDEX__AUTO_INCR_MASK));
1273 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1276 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1278 /* type 1 wave data */
1279 dst[(*no_fields)++] = 1;
1280 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1281 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1282 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1283 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1284 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1285 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1286 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1287 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1288 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1289 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1290 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1291 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1292 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1293 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1296 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1297 uint32_t wave, uint32_t start,
1298 uint32_t size, uint32_t *dst)
1301 adev, simd, wave, 0,
1302 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1305 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1306 uint32_t wave, uint32_t thread,
1307 uint32_t start, uint32_t size,
1311 adev, simd, wave, thread,
1312 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1315 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1316 u32 me, u32 pipe, u32 q)
1318 soc15_grbm_select(adev, me, pipe, q, 0);
1321 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1322 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1323 .select_se_sh = &gfx_v9_0_select_se_sh,
1324 .read_wave_data = &gfx_v9_0_read_wave_data,
1325 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1326 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1327 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
1330 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1335 adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1337 switch (adev->asic_type) {
1339 adev->gfx.config.max_hw_contexts = 8;
1340 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1341 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1342 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1343 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1344 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1347 adev->gfx.config.max_hw_contexts = 8;
1348 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1349 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1350 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1351 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1352 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1353 DRM_INFO("fix gfx.config for vega12\n");
1356 adev->gfx.config.max_hw_contexts = 8;
1357 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1358 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1359 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1360 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1361 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1362 gb_addr_config &= ~0xf3e777ff;
1363 gb_addr_config |= 0x22014042;
1364 /* check vbios table if gpu info is not available */
1365 err = amdgpu_atomfirmware_get_gfx_info(adev);
1370 adev->gfx.config.max_hw_contexts = 8;
1371 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1372 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1373 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1374 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1375 if (adev->rev_id >= 8)
1376 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1378 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1385 adev->gfx.config.gb_addr_config = gb_addr_config;
1387 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1389 adev->gfx.config.gb_addr_config,
1393 adev->gfx.config.max_tile_pipes =
1394 adev->gfx.config.gb_addr_config_fields.num_pipes;
1396 adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1398 adev->gfx.config.gb_addr_config,
1401 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1403 adev->gfx.config.gb_addr_config,
1405 MAX_COMPRESSED_FRAGS);
1406 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1408 adev->gfx.config.gb_addr_config,
1411 adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1413 adev->gfx.config.gb_addr_config,
1415 NUM_SHADER_ENGINES);
1416 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1418 adev->gfx.config.gb_addr_config,
1420 PIPE_INTERLEAVE_SIZE));
1425 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1426 struct amdgpu_ngg_buf *ngg_buf,
1428 int default_size_se)
1433 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1436 size_se = size_se ? size_se : default_size_se;
1438 ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1439 r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1440 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1445 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1448 ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1453 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1457 for (i = 0; i < NGG_BUF_MAX; i++)
1458 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1459 &adev->gfx.ngg.buf[i].gpu_addr,
1462 memset(&adev->gfx.ngg.buf[0], 0,
1463 sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1465 adev->gfx.ngg.init = false;
1470 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1474 if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1477 /* GDS reserve memory: 64 bytes alignment */
1478 adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1479 adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1480 adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1481 adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1483 /* Primitive Buffer */
1484 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1485 amdgpu_prim_buf_per_se,
1488 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1492 /* Position Buffer */
1493 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1494 amdgpu_pos_buf_per_se,
1497 dev_err(adev->dev, "Failed to create Position Buffer\n");
1501 /* Control Sideband */
1502 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1503 amdgpu_cntl_sb_buf_per_se,
1506 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1510 /* Parameter Cache, not created by default */
1511 if (amdgpu_param_buf_per_se <= 0)
1514 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
1515 amdgpu_param_buf_per_se,
1518 dev_err(adev->dev, "Failed to create Parameter Cache\n");
1523 adev->gfx.ngg.init = true;
1526 gfx_v9_0_ngg_fini(adev);
1530 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1532 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1539 /* Program buffer size */
1540 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1541 adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1542 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1543 adev->gfx.ngg.buf[NGG_POS].size >> 8);
1544 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
1546 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1547 adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1548 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1549 adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
1550 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
1552 /* Program buffer base address */
1553 base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1554 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
1555 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
1557 base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1558 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
1559 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
1561 base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1562 data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
1563 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
1565 base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1566 data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
1567 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
1569 base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1570 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
1571 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
1573 base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1574 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
1575 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
1577 /* Clear GDS reserved memory */
1578 r = amdgpu_ring_alloc(ring, 17);
1580 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
1585 gfx_v9_0_write_data_to_reg(ring, 0, false,
1586 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
1587 (adev->gds.gds_size +
1588 adev->gfx.ngg.gds_reserve_size));
1590 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1591 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
1592 PACKET3_DMA_DATA_DST_SEL(1) |
1593 PACKET3_DMA_DATA_SRC_SEL(2)));
1594 amdgpu_ring_write(ring, 0);
1595 amdgpu_ring_write(ring, 0);
1596 amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1597 amdgpu_ring_write(ring, 0);
1598 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1599 adev->gfx.ngg.gds_reserve_size);
1601 gfx_v9_0_write_data_to_reg(ring, 0, false,
1602 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
1604 amdgpu_ring_commit(ring);
1609 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1610 int mec, int pipe, int queue)
1614 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1616 ring = &adev->gfx.compute_ring[ring_id];
1621 ring->queue = queue;
1623 ring->ring_obj = NULL;
1624 ring->use_doorbell = true;
1625 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1626 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1627 + (ring_id * GFX9_MEC_HPD_SIZE);
1628 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1630 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1631 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1634 /* type-2 packets are deprecated on MEC, use type-3 instead */
1635 r = amdgpu_ring_init(adev, ring, 1024,
1636 &adev->gfx.eop_irq, irq_type);
1644 static int gfx_v9_0_sw_init(void *handle)
1646 int i, j, k, r, ring_id;
1647 struct amdgpu_ring *ring;
1648 struct amdgpu_kiq *kiq;
1649 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1651 switch (adev->asic_type) {
1656 adev->gfx.mec.num_mec = 2;
1659 adev->gfx.mec.num_mec = 1;
1663 adev->gfx.mec.num_pipe_per_mec = 4;
1664 adev->gfx.mec.num_queue_per_pipe = 8;
1667 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
1671 /* Privileged reg */
1672 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
1673 &adev->gfx.priv_reg_irq);
1677 /* Privileged inst */
1678 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
1679 &adev->gfx.priv_inst_irq);
1684 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
1685 &adev->gfx.cp_ecc_error_irq);
1690 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
1691 &adev->gfx.cp_ecc_error_irq);
1695 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1697 gfx_v9_0_scratch_init(adev);
1699 r = gfx_v9_0_init_microcode(adev);
1701 DRM_ERROR("Failed to load gfx firmware!\n");
1705 r = adev->gfx.rlc.funcs->init(adev);
1707 DRM_ERROR("Failed to init rlc BOs!\n");
1711 r = gfx_v9_0_mec_init(adev);
1713 DRM_ERROR("Failed to init MEC BOs!\n");
1717 /* set up the gfx ring */
1718 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1719 ring = &adev->gfx.gfx_ring[i];
1720 ring->ring_obj = NULL;
1722 sprintf(ring->name, "gfx");
1724 sprintf(ring->name, "gfx_%d", i);
1725 ring->use_doorbell = true;
1726 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1727 r = amdgpu_ring_init(adev, ring, 1024,
1728 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
1733 /* set up the compute queues - allocate horizontally across pipes */
1735 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1736 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1737 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1738 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1741 r = gfx_v9_0_compute_ring_init(adev,
1752 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
1754 DRM_ERROR("Failed to init KIQ BOs!\n");
1758 kiq = &adev->gfx.kiq;
1759 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1763 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
1764 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
1768 adev->gfx.ce_ram_size = 0x8000;
1770 r = gfx_v9_0_gpu_early_init(adev);
1774 r = gfx_v9_0_ngg_init(adev);
1782 static int gfx_v9_0_sw_fini(void *handle)
1785 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1787 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
1789 struct ras_common_if *ras_if = adev->gfx.ras_if;
1790 struct ras_ih_if ih_info = {
1794 amdgpu_ras_debugfs_remove(adev, ras_if);
1795 amdgpu_ras_sysfs_remove(adev, ras_if);
1796 amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
1797 amdgpu_ras_feature_enable(adev, ras_if, 0);
1801 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1802 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1803 for (i = 0; i < adev->gfx.num_compute_rings; i++)
1804 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1806 amdgpu_gfx_mqd_sw_fini(adev);
1807 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1808 amdgpu_gfx_kiq_fini(adev);
1810 gfx_v9_0_mec_fini(adev);
1811 gfx_v9_0_ngg_fini(adev);
1812 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1813 if (adev->asic_type == CHIP_RAVEN) {
1814 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1815 &adev->gfx.rlc.cp_table_gpu_addr,
1816 (void **)&adev->gfx.rlc.cp_table_ptr);
1818 gfx_v9_0_free_microcode(adev);
1824 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1829 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1833 if (instance == 0xffffffff)
1834 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1836 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1838 if (se_num == 0xffffffff)
1839 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1841 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1843 if (sh_num == 0xffffffff)
1844 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1846 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1848 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
1851 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1855 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1856 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
1858 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1859 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1861 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1862 adev->gfx.config.max_sh_per_se);
1864 return (~data) & mask;
1867 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1872 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1873 adev->gfx.config.max_sh_per_se;
1875 mutex_lock(&adev->grbm_idx_mutex);
1876 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1877 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1878 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1879 data = gfx_v9_0_get_rb_active_bitmap(adev);
1880 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1881 rb_bitmap_width_per_sh);
1884 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1885 mutex_unlock(&adev->grbm_idx_mutex);
1887 adev->gfx.config.backend_enable_mask = active_rbs;
1888 adev->gfx.config.num_rbs = hweight32(active_rbs);
1891 #define DEFAULT_SH_MEM_BASES (0x6000)
1892 #define FIRST_COMPUTE_VMID (8)
1893 #define LAST_COMPUTE_VMID (16)
1894 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1897 uint32_t sh_mem_config;
1898 uint32_t sh_mem_bases;
1901 * Configure apertures:
1902 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
1903 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
1904 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
1906 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1908 sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1909 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1910 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1912 mutex_lock(&adev->srbm_mutex);
1913 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1914 soc15_grbm_select(adev, 0, 0, 0, i);
1915 /* CP and shaders */
1916 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1917 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
1919 soc15_grbm_select(adev, 0, 0, 0, 0);
1920 mutex_unlock(&adev->srbm_mutex);
1923 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
1928 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1930 gfx_v9_0_tiling_mode_table_init(adev);
1932 gfx_v9_0_setup_rb(adev);
1933 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
1934 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
1936 /* XXX SH_MEM regs */
1937 /* where to put LDS, scratch, GPUVM in FSA64 space */
1938 mutex_lock(&adev->srbm_mutex);
1939 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
1940 soc15_grbm_select(adev, 0, 0, 0, i);
1941 /* CP and shaders */
1943 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1944 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1945 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1946 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
1948 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1949 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1950 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1951 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1952 (adev->gmc.private_aperture_start >> 48));
1953 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1954 (adev->gmc.shared_aperture_start >> 48));
1955 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
1958 soc15_grbm_select(adev, 0, 0, 0, 0);
1960 mutex_unlock(&adev->srbm_mutex);
1962 gfx_v9_0_init_compute_vmid(adev);
1965 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1970 mutex_lock(&adev->grbm_idx_mutex);
1971 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1972 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1973 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1974 for (k = 0; k < adev->usec_timeout; k++) {
1975 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
1979 if (k == adev->usec_timeout) {
1980 gfx_v9_0_select_se_sh(adev, 0xffffffff,
1981 0xffffffff, 0xffffffff);
1982 mutex_unlock(&adev->grbm_idx_mutex);
1983 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
1989 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1990 mutex_unlock(&adev->grbm_idx_mutex);
1992 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
1993 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
1994 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
1995 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
1996 for (k = 0; k < adev->usec_timeout; k++) {
1997 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2003 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2006 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2008 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2009 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2010 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2011 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2013 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2016 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2019 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2020 adev->gfx.rlc.clear_state_gpu_addr >> 32);
2021 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2022 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2023 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2024 adev->gfx.rlc.clear_state_size);
2027 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2028 int indirect_offset,
2030 int *unique_indirect_regs,
2031 int unique_indirect_reg_count,
2032 int *indirect_start_offsets,
2033 int *indirect_start_offsets_count,
2034 int max_start_offsets_count)
2038 for (; indirect_offset < list_size; indirect_offset++) {
2039 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2040 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2041 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2043 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2044 indirect_offset += 2;
2046 /* look for the matching indice */
2047 for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2048 if (unique_indirect_regs[idx] ==
2049 register_list_format[indirect_offset] ||
2050 !unique_indirect_regs[idx])
2054 BUG_ON(idx >= unique_indirect_reg_count);
2056 if (!unique_indirect_regs[idx])
2057 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2064 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2066 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2067 int unique_indirect_reg_count = 0;
2069 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2070 int indirect_start_offsets_count = 0;
2076 u32 *register_list_format =
2077 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2078 if (!register_list_format)
2080 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
2081 adev->gfx.rlc.reg_list_format_size_bytes);
2083 /* setup unique_indirect_regs array and indirect_start_offsets array */
2084 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2085 gfx_v9_1_parse_ind_reg_list(register_list_format,
2086 adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2087 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2088 unique_indirect_regs,
2089 unique_indirect_reg_count,
2090 indirect_start_offsets,
2091 &indirect_start_offsets_count,
2092 ARRAY_SIZE(indirect_start_offsets));
2094 /* enable auto inc in case it is disabled */
2095 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2096 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2097 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2099 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2100 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2101 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2102 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2103 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2104 adev->gfx.rlc.register_restore[i]);
2106 /* load indirect register */
2107 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2108 adev->gfx.rlc.reg_list_format_start);
2110 /* direct register portion */
2111 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2112 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2113 register_list_format[i]);
2115 /* indirect register portion */
2116 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2117 if (register_list_format[i] == 0xFFFFFFFF) {
2118 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2122 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2123 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2125 for (j = 0; j < unique_indirect_reg_count; j++) {
2126 if (register_list_format[i] == unique_indirect_regs[j]) {
2127 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2132 BUG_ON(j >= unique_indirect_reg_count);
2137 /* set save/restore list size */
2138 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2139 list_size = list_size >> 1;
2140 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2141 adev->gfx.rlc.reg_restore_list_size);
2142 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2144 /* write the starting offsets to RLC scratch ram */
2145 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2146 adev->gfx.rlc.starting_offsets_start);
2147 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2148 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2149 indirect_start_offsets[i]);
2151 /* load unique indirect regs*/
2152 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2153 if (unique_indirect_regs[i] != 0) {
2154 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2155 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2156 unique_indirect_regs[i] & 0x3FFFF);
2158 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2159 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2160 unique_indirect_regs[i] >> 20);
2164 kfree(register_list_format);
2168 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2170 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2173 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2177 uint32_t default_data = 0;
2179 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2180 if (enable == true) {
2181 /* enable GFXIP control over CGPG */
2182 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2183 if(default_data != data)
2184 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2187 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2188 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2189 if(default_data != data)
2190 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2192 /* restore GFXIP control over GCPG */
2193 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2194 if(default_data != data)
2195 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2199 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2203 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2204 AMD_PG_SUPPORT_GFX_SMG |
2205 AMD_PG_SUPPORT_GFX_DMG)) {
2206 /* init IDLE_POLL_COUNT = 60 */
2207 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2208 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2209 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2210 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2212 /* init RLC PG Delay */
2214 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2215 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2216 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2217 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2218 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2220 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2221 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2222 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2223 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2225 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2226 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2227 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2228 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2230 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2231 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2233 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2234 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2235 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2237 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2241 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2245 uint32_t default_data = 0;
2247 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2248 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2249 SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2251 if (default_data != data)
2252 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2255 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2259 uint32_t default_data = 0;
2261 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2262 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2263 SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2265 if(default_data != data)
2266 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2269 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2273 uint32_t default_data = 0;
2275 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2276 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2279 if(default_data != data)
2280 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2283 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2286 uint32_t data, default_data;
2288 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2289 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2290 GFX_POWER_GATING_ENABLE,
2292 if(default_data != data)
2293 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2296 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2299 uint32_t data, default_data;
2301 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2302 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2303 GFX_PIPELINE_PG_ENABLE,
2305 if(default_data != data)
2306 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2309 /* read any GFX register to wake up GFX */
2310 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2313 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2316 uint32_t data, default_data;
2318 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2319 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2320 STATIC_PER_CU_PG_ENABLE,
2322 if(default_data != data)
2323 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2326 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2329 uint32_t data, default_data;
2331 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2332 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2333 DYN_PER_CU_PG_ENABLE,
2335 if(default_data != data)
2336 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2339 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2341 gfx_v9_0_init_csb(adev);
2344 * Rlc save restore list is workable since v2_1.
2345 * And it's needed by gfxoff feature.
2347 if (adev->gfx.rlc.is_rlc_v2_1) {
2348 gfx_v9_1_init_rlc_save_restore_list(adev);
2349 gfx_v9_0_enable_save_restore_machine(adev);
2352 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2353 AMD_PG_SUPPORT_GFX_SMG |
2354 AMD_PG_SUPPORT_GFX_DMG |
2356 AMD_PG_SUPPORT_GDS |
2357 AMD_PG_SUPPORT_RLC_SMU_HS)) {
2358 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2359 adev->gfx.rlc.cp_table_gpu_addr >> 8);
2360 gfx_v9_0_init_gfx_power_gating(adev);
2364 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2366 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2367 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2368 gfx_v9_0_wait_for_rlc_serdes(adev);
2371 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2373 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2375 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2379 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2381 #ifdef AMDGPU_RLC_DEBUG_RETRY
2385 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2388 /* carrizo do enable cp interrupt after cp inited */
2389 if (!(adev->flags & AMD_IS_APU)) {
2390 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2394 #ifdef AMDGPU_RLC_DEBUG_RETRY
2395 /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2396 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2397 if(rlc_ucode_ver == 0x108) {
2398 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2399 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2400 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2401 * default is 0x9C4 to create a 100us interval */
2402 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2403 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2404 * to disable the page fault retry interrupts, default is
2406 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2411 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2413 const struct rlc_firmware_header_v2_0 *hdr;
2414 const __le32 *fw_data;
2415 unsigned i, fw_size;
2417 if (!adev->gfx.rlc_fw)
2420 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2421 amdgpu_ucode_print_rlc_hdr(&hdr->header);
2423 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2424 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2425 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2427 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2428 RLCG_UCODE_LOADING_START_ADDRESS);
2429 for (i = 0; i < fw_size; i++)
2430 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2431 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2436 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2440 if (amdgpu_sriov_vf(adev)) {
2441 gfx_v9_0_init_csb(adev);
2445 adev->gfx.rlc.funcs->stop(adev);
2448 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2450 gfx_v9_0_init_pg(adev);
2452 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2453 /* legacy rlc firmware loading */
2454 r = gfx_v9_0_rlc_load_microcode(adev);
2459 switch (adev->asic_type) {
2461 if (amdgpu_lbpw == 0)
2462 gfx_v9_0_enable_lbpw(adev, false);
2464 gfx_v9_0_enable_lbpw(adev, true);
2467 if (amdgpu_lbpw > 0)
2468 gfx_v9_0_enable_lbpw(adev, true);
2470 gfx_v9_0_enable_lbpw(adev, false);
2476 adev->gfx.rlc.funcs->start(adev);
2481 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2484 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2486 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2487 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2488 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2490 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2491 adev->gfx.gfx_ring[i].sched.ready = false;
2493 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2497 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2499 const struct gfx_firmware_header_v1_0 *pfp_hdr;
2500 const struct gfx_firmware_header_v1_0 *ce_hdr;
2501 const struct gfx_firmware_header_v1_0 *me_hdr;
2502 const __le32 *fw_data;
2503 unsigned i, fw_size;
2505 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2508 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2509 adev->gfx.pfp_fw->data;
2510 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2511 adev->gfx.ce_fw->data;
2512 me_hdr = (const struct gfx_firmware_header_v1_0 *)
2513 adev->gfx.me_fw->data;
2515 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2516 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2517 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2519 gfx_v9_0_cp_gfx_enable(adev, false);
2522 fw_data = (const __le32 *)
2523 (adev->gfx.pfp_fw->data +
2524 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2525 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2526 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2527 for (i = 0; i < fw_size; i++)
2528 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2529 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2532 fw_data = (const __le32 *)
2533 (adev->gfx.ce_fw->data +
2534 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2535 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2536 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2537 for (i = 0; i < fw_size; i++)
2538 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2539 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2542 fw_data = (const __le32 *)
2543 (adev->gfx.me_fw->data +
2544 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2545 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2546 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2547 for (i = 0; i < fw_size; i++)
2548 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2549 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2554 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2556 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2557 const struct cs_section_def *sect = NULL;
2558 const struct cs_extent_def *ext = NULL;
2562 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2563 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2565 gfx_v9_0_cp_gfx_enable(adev, true);
2567 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2569 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2573 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2574 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2576 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2577 amdgpu_ring_write(ring, 0x80000000);
2578 amdgpu_ring_write(ring, 0x80000000);
2580 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2581 for (ext = sect->section; ext->extent != NULL; ++ext) {
2582 if (sect->id == SECT_CONTEXT) {
2583 amdgpu_ring_write(ring,
2584 PACKET3(PACKET3_SET_CONTEXT_REG,
2586 amdgpu_ring_write(ring,
2587 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2588 for (i = 0; i < ext->reg_count; i++)
2589 amdgpu_ring_write(ring, ext->extent[i]);
2594 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2595 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2597 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2598 amdgpu_ring_write(ring, 0);
2600 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2601 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2602 amdgpu_ring_write(ring, 0x8000);
2603 amdgpu_ring_write(ring, 0x8000);
2605 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2606 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2607 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2608 amdgpu_ring_write(ring, tmp);
2609 amdgpu_ring_write(ring, 0);
2611 amdgpu_ring_commit(ring);
2616 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2618 struct amdgpu_ring *ring;
2621 u64 rb_addr, rptr_addr, wptr_gpu_addr;
2623 /* Set the write pointer delay */
2624 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2626 /* set the RB to use vmid 0 */
2627 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2629 /* Set ring buffer size */
2630 ring = &adev->gfx.gfx_ring[0];
2631 rb_bufsz = order_base_2(ring->ring_size / 8);
2632 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2633 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2635 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2637 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2639 /* Initialize the ring buffer's write pointers */
2641 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2642 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2644 /* set the wb address wether it's enabled or not */
2645 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2646 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2647 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2649 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2650 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2651 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
2654 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2656 rb_addr = ring->gpu_addr >> 8;
2657 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2658 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2660 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
2661 if (ring->use_doorbell) {
2662 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2663 DOORBELL_OFFSET, ring->doorbell_index);
2664 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2667 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2669 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
2671 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2672 DOORBELL_RANGE_LOWER, ring->doorbell_index);
2673 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2675 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
2676 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2679 /* start the ring */
2680 gfx_v9_0_cp_gfx_start(adev);
2681 ring->sched.ready = true;
2686 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2691 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
2693 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
2694 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2695 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2696 adev->gfx.compute_ring[i].sched.ready = false;
2697 adev->gfx.kiq.ring.sched.ready = false;
2702 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2704 const struct gfx_firmware_header_v1_0 *mec_hdr;
2705 const __le32 *fw_data;
2709 if (!adev->gfx.mec_fw)
2712 gfx_v9_0_cp_compute_enable(adev, false);
2714 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2715 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2717 fw_data = (const __le32 *)
2718 (adev->gfx.mec_fw->data +
2719 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2721 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2722 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2723 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
2725 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
2726 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
2727 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2728 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2731 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2732 mec_hdr->jt_offset);
2733 for (i = 0; i < mec_hdr->jt_size; i++)
2734 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
2735 le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2737 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2738 adev->gfx.mec_fw_version);
2739 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2745 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2748 struct amdgpu_device *adev = ring->adev;
2750 /* tell RLC which is KIQ queue */
2751 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2753 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2754 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2756 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2759 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2761 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
2762 uint64_t queue_mask = 0;
2765 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2766 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2769 /* This situation may be hit in the future if a new HW
2770 * generation exposes more than 64 queues. If so, the
2771 * definition of queue_mask needs updating */
2772 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
2773 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2777 queue_mask |= (1ull << i);
2780 r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
2782 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2787 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2788 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2789 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
2790 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
2791 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
2792 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
2793 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
2794 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
2795 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
2796 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2797 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2798 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2799 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2801 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2802 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2803 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2804 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2805 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2806 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2807 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2808 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2809 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2810 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2811 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2812 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2813 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2814 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2815 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2816 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2817 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2820 r = amdgpu_ring_test_helper(kiq_ring);
2822 DRM_ERROR("KCQ enable failed\n");
2827 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2829 struct amdgpu_device *adev = ring->adev;
2830 struct v9_mqd *mqd = ring->mqd_ptr;
2831 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2834 mqd->header = 0xC0310800;
2835 mqd->compute_pipelinestat_enable = 0x00000001;
2836 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2837 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2838 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2839 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2840 mqd->compute_misc_reserved = 0x00000003;
2842 mqd->dynamic_cu_mask_addr_lo =
2843 lower_32_bits(ring->mqd_gpu_addr
2844 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2845 mqd->dynamic_cu_mask_addr_hi =
2846 upper_32_bits(ring->mqd_gpu_addr
2847 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2849 eop_base_addr = ring->eop_gpu_addr >> 8;
2850 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2851 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2853 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2854 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
2855 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2856 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
2858 mqd->cp_hqd_eop_control = tmp;
2860 /* enable doorbell? */
2861 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2863 if (ring->use_doorbell) {
2864 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2865 DOORBELL_OFFSET, ring->doorbell_index);
2866 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2868 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2869 DOORBELL_SOURCE, 0);
2870 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2873 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2877 mqd->cp_hqd_pq_doorbell_control = tmp;
2879 /* disable the queue if it's active */
2881 mqd->cp_hqd_dequeue_request = 0;
2882 mqd->cp_hqd_pq_rptr = 0;
2883 mqd->cp_hqd_pq_wptr_lo = 0;
2884 mqd->cp_hqd_pq_wptr_hi = 0;
2886 /* set the pointer to the MQD */
2887 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
2888 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
2890 /* set MQD vmid to 0 */
2891 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
2892 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2893 mqd->cp_mqd_control = tmp;
2895 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2896 hqd_gpu_addr = ring->gpu_addr >> 8;
2897 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2898 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2900 /* set up the HQD, this is similar to CP_RB0_CNTL */
2901 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
2902 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2903 (order_base_2(ring->ring_size / 4) - 1));
2904 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2905 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2907 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
2909 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2910 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
2911 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2912 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2913 mqd->cp_hqd_pq_control = tmp;
2915 /* set the wb address whether it's enabled or not */
2916 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2917 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2918 mqd->cp_hqd_pq_rptr_report_addr_hi =
2919 upper_32_bits(wb_gpu_addr) & 0xffff;
2921 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2922 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2923 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2924 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2927 /* enable the doorbell if requested */
2928 if (ring->use_doorbell) {
2929 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2930 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2931 DOORBELL_OFFSET, ring->doorbell_index);
2933 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2935 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2936 DOORBELL_SOURCE, 0);
2937 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2941 mqd->cp_hqd_pq_doorbell_control = tmp;
2943 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2945 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
2947 /* set the vmid for the queue */
2948 mqd->cp_hqd_vmid = 0;
2950 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
2951 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2952 mqd->cp_hqd_persistent_state = tmp;
2954 /* set MIN_IB_AVAIL_SIZE */
2955 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
2956 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
2957 mqd->cp_hqd_ib_control = tmp;
2959 /* activate the queue */
2960 mqd->cp_hqd_active = 1;
2965 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2967 struct amdgpu_device *adev = ring->adev;
2968 struct v9_mqd *mqd = ring->mqd_ptr;
2971 /* disable wptr polling */
2972 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2974 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
2975 mqd->cp_hqd_eop_base_addr_lo);
2976 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
2977 mqd->cp_hqd_eop_base_addr_hi);
2979 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2980 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
2981 mqd->cp_hqd_eop_control);
2983 /* enable doorbell? */
2984 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
2985 mqd->cp_hqd_pq_doorbell_control);
2987 /* disable the queue if it's active */
2988 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
2989 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
2990 for (j = 0; j < adev->usec_timeout; j++) {
2991 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
2995 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
2996 mqd->cp_hqd_dequeue_request);
2997 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
2998 mqd->cp_hqd_pq_rptr);
2999 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3000 mqd->cp_hqd_pq_wptr_lo);
3001 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3002 mqd->cp_hqd_pq_wptr_hi);
3005 /* set the pointer to the MQD */
3006 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3007 mqd->cp_mqd_base_addr_lo);
3008 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3009 mqd->cp_mqd_base_addr_hi);
3011 /* set MQD vmid to 0 */
3012 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3013 mqd->cp_mqd_control);
3015 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3016 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3017 mqd->cp_hqd_pq_base_lo);
3018 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3019 mqd->cp_hqd_pq_base_hi);
3021 /* set up the HQD, this is similar to CP_RB0_CNTL */
3022 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3023 mqd->cp_hqd_pq_control);
3025 /* set the wb address whether it's enabled or not */
3026 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3027 mqd->cp_hqd_pq_rptr_report_addr_lo);
3028 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3029 mqd->cp_hqd_pq_rptr_report_addr_hi);
3031 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3032 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3033 mqd->cp_hqd_pq_wptr_poll_addr_lo);
3034 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3035 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3037 /* enable the doorbell if requested */
3038 if (ring->use_doorbell) {
3039 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3040 (adev->doorbell_index.kiq * 2) << 2);
3041 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3042 (adev->doorbell_index.userqueue_end * 2) << 2);
3045 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3046 mqd->cp_hqd_pq_doorbell_control);
3048 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3049 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3050 mqd->cp_hqd_pq_wptr_lo);
3051 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3052 mqd->cp_hqd_pq_wptr_hi);
3054 /* set the vmid for the queue */
3055 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3057 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3058 mqd->cp_hqd_persistent_state);
3060 /* activate the queue */
3061 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3062 mqd->cp_hqd_active);
3064 if (ring->use_doorbell)
3065 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3070 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3072 struct amdgpu_device *adev = ring->adev;
3075 /* disable the queue if it's active */
3076 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3078 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3080 for (j = 0; j < adev->usec_timeout; j++) {
3081 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3086 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3087 DRM_DEBUG("KIQ dequeue request failed.\n");
3089 /* Manual disable if dequeue request times out */
3090 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3093 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3097 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3098 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3099 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3100 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3101 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3102 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3103 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3104 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3109 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3111 struct amdgpu_device *adev = ring->adev;
3112 struct v9_mqd *mqd = ring->mqd_ptr;
3113 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3115 gfx_v9_0_kiq_setting(ring);
3117 if (adev->in_gpu_reset) { /* for GPU_RESET case */
3118 /* reset MQD to a clean status */
3119 if (adev->gfx.mec.mqd_backup[mqd_idx])
3120 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3122 /* reset ring buffer */
3124 amdgpu_ring_clear_ring(ring);
3126 mutex_lock(&adev->srbm_mutex);
3127 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3128 gfx_v9_0_kiq_init_register(ring);
3129 soc15_grbm_select(adev, 0, 0, 0, 0);
3130 mutex_unlock(&adev->srbm_mutex);
3132 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3133 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3134 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3135 mutex_lock(&adev->srbm_mutex);
3136 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3137 gfx_v9_0_mqd_init(ring);
3138 gfx_v9_0_kiq_init_register(ring);
3139 soc15_grbm_select(adev, 0, 0, 0, 0);
3140 mutex_unlock(&adev->srbm_mutex);
3142 if (adev->gfx.mec.mqd_backup[mqd_idx])
3143 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3149 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3151 struct amdgpu_device *adev = ring->adev;
3152 struct v9_mqd *mqd = ring->mqd_ptr;
3153 int mqd_idx = ring - &adev->gfx.compute_ring[0];
3155 if (!adev->in_gpu_reset && !adev->in_suspend) {
3156 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3157 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3158 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3159 mutex_lock(&adev->srbm_mutex);
3160 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3161 gfx_v9_0_mqd_init(ring);
3162 soc15_grbm_select(adev, 0, 0, 0, 0);
3163 mutex_unlock(&adev->srbm_mutex);
3165 if (adev->gfx.mec.mqd_backup[mqd_idx])
3166 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3167 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3168 /* reset MQD to a clean status */
3169 if (adev->gfx.mec.mqd_backup[mqd_idx])
3170 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3172 /* reset ring buffer */
3174 amdgpu_ring_clear_ring(ring);
3176 amdgpu_ring_clear_ring(ring);
3182 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3184 struct amdgpu_ring *ring;
3187 ring = &adev->gfx.kiq.ring;
3189 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3190 if (unlikely(r != 0))
3193 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3194 if (unlikely(r != 0))
3197 gfx_v9_0_kiq_init_queue(ring);
3198 amdgpu_bo_kunmap(ring->mqd_obj);
3199 ring->mqd_ptr = NULL;
3200 amdgpu_bo_unreserve(ring->mqd_obj);
3201 ring->sched.ready = true;
3205 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3207 struct amdgpu_ring *ring = NULL;
3210 gfx_v9_0_cp_compute_enable(adev, true);
3212 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3213 ring = &adev->gfx.compute_ring[i];
3215 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3216 if (unlikely(r != 0))
3218 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3220 r = gfx_v9_0_kcq_init_queue(ring);
3221 amdgpu_bo_kunmap(ring->mqd_obj);
3222 ring->mqd_ptr = NULL;
3224 amdgpu_bo_unreserve(ring->mqd_obj);
3229 r = gfx_v9_0_kiq_kcq_enable(adev);
3234 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3237 struct amdgpu_ring *ring;
3239 if (!(adev->flags & AMD_IS_APU))
3240 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3242 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3243 /* legacy firmware loading */
3244 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3248 r = gfx_v9_0_cp_compute_load_microcode(adev);
3253 r = gfx_v9_0_kiq_resume(adev);
3257 r = gfx_v9_0_cp_gfx_resume(adev);
3261 r = gfx_v9_0_kcq_resume(adev);
3265 ring = &adev->gfx.gfx_ring[0];
3266 r = amdgpu_ring_test_helper(ring);
3270 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3271 ring = &adev->gfx.compute_ring[i];
3272 amdgpu_ring_test_helper(ring);
3275 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3280 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3282 gfx_v9_0_cp_gfx_enable(adev, enable);
3283 gfx_v9_0_cp_compute_enable(adev, enable);
3286 static int gfx_v9_0_hw_init(void *handle)
3289 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3291 gfx_v9_0_init_golden_registers(adev);
3293 gfx_v9_0_constants_init(adev);
3295 r = gfx_v9_0_csb_vram_pin(adev);
3299 r = adev->gfx.rlc.funcs->resume(adev);
3303 r = gfx_v9_0_cp_resume(adev);
3307 r = gfx_v9_0_ngg_en(adev);
3314 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3317 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3319 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3321 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3323 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3324 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3326 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3327 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3328 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3329 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3330 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3331 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3332 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3333 amdgpu_ring_write(kiq_ring, 0);
3334 amdgpu_ring_write(kiq_ring, 0);
3335 amdgpu_ring_write(kiq_ring, 0);
3337 r = amdgpu_ring_test_helper(kiq_ring);
3339 DRM_ERROR("KCQ disable failed\n");
3344 static int gfx_v9_0_hw_fini(void *handle)
3346 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3348 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3349 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3350 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3352 /* disable KCQ to avoid CPC touch memory not valid anymore */
3353 gfx_v9_0_kcq_disable(adev);
3355 if (amdgpu_sriov_vf(adev)) {
3356 gfx_v9_0_cp_gfx_enable(adev, false);
3357 /* must disable polling for SRIOV when hw finished, otherwise
3358 * CPC engine may still keep fetching WB address which is already
3359 * invalid after sw finished and trigger DMAR reading error in
3362 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3366 /* Use deinitialize sequence from CAIL when unbinding device from driver,
3367 * otherwise KIQ is hanging when binding back
3369 if (!adev->in_gpu_reset && !adev->in_suspend) {
3370 mutex_lock(&adev->srbm_mutex);
3371 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3372 adev->gfx.kiq.ring.pipe,
3373 adev->gfx.kiq.ring.queue, 0);
3374 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3375 soc15_grbm_select(adev, 0, 0, 0, 0);
3376 mutex_unlock(&adev->srbm_mutex);
3379 gfx_v9_0_cp_enable(adev, false);
3380 adev->gfx.rlc.funcs->stop(adev);
3382 gfx_v9_0_csb_vram_unpin(adev);
3387 static int gfx_v9_0_suspend(void *handle)
3389 return gfx_v9_0_hw_fini(handle);
3392 static int gfx_v9_0_resume(void *handle)
3394 return gfx_v9_0_hw_init(handle);
3397 static bool gfx_v9_0_is_idle(void *handle)
3399 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3401 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3402 GRBM_STATUS, GUI_ACTIVE))
3408 static int gfx_v9_0_wait_for_idle(void *handle)
3411 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3413 for (i = 0; i < adev->usec_timeout; i++) {
3414 if (gfx_v9_0_is_idle(handle))
3421 static int gfx_v9_0_soft_reset(void *handle)
3423 u32 grbm_soft_reset = 0;
3425 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3428 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3429 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3430 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3431 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3432 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3433 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3434 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3435 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3436 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3437 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3438 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3441 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3442 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3443 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3447 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3448 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3449 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3450 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3453 if (grbm_soft_reset) {
3455 adev->gfx.rlc.funcs->stop(adev);
3457 /* Disable GFX parsing/prefetching */
3458 gfx_v9_0_cp_gfx_enable(adev, false);
3460 /* Disable MEC parsing/prefetching */
3461 gfx_v9_0_cp_compute_enable(adev, false);
3463 if (grbm_soft_reset) {
3464 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3465 tmp |= grbm_soft_reset;
3466 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3467 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3468 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3472 tmp &= ~grbm_soft_reset;
3473 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3474 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3477 /* Wait a little for things to settle down */
3483 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3487 mutex_lock(&adev->gfx.gpu_clock_mutex);
3488 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3489 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3490 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3491 mutex_unlock(&adev->gfx.gpu_clock_mutex);
3495 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3497 uint32_t gds_base, uint32_t gds_size,
3498 uint32_t gws_base, uint32_t gws_size,
3499 uint32_t oa_base, uint32_t oa_size)
3501 struct amdgpu_device *adev = ring->adev;
3504 gfx_v9_0_write_data_to_reg(ring, 0, false,
3505 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3509 gfx_v9_0_write_data_to_reg(ring, 0, false,
3510 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3514 gfx_v9_0_write_data_to_reg(ring, 0, false,
3515 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3516 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3519 gfx_v9_0_write_data_to_reg(ring, 0, false,
3520 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3521 (1 << (oa_size + oa_base)) - (1 << oa_base));
3524 static const u32 vgpr_init_compute_shader[] =
3526 0xb07c0000, 0xbe8000ff,
3527 0x000000f8, 0xbf110800,
3528 0x7e000280, 0x7e020280,
3529 0x7e040280, 0x7e060280,
3530 0x7e080280, 0x7e0a0280,
3531 0x7e0c0280, 0x7e0e0280,
3532 0x80808800, 0xbe803200,
3533 0xbf84fff5, 0xbf9c0000,
3534 0xd28c0001, 0x0001007f,
3535 0xd28d0001, 0x0002027e,
3536 0x10020288, 0xb8810904,
3537 0xb7814000, 0xd1196a01,
3538 0x00000301, 0xbe800087,
3539 0xbefc00c1, 0xd89c4000,
3540 0x00020201, 0xd89cc080,
3541 0x00040401, 0x320202ff,
3542 0x00000800, 0x80808100,
3543 0xbf84fff8, 0x7e020280,
3544 0xbf810000, 0x00000000,
3547 static const u32 sgpr_init_compute_shader[] =
3549 0xb07c0000, 0xbe8000ff,
3550 0x0000005f, 0xbee50080,
3551 0xbe812c65, 0xbe822c65,
3552 0xbe832c65, 0xbe842c65,
3553 0xbe852c65, 0xb77c0005,
3554 0x80808500, 0xbf84fff8,
3555 0xbe800080, 0xbf810000,
3558 static const struct soc15_reg_entry vgpr_init_regs[] = {
3559 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3560 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3561 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3562 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3563 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3564 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3565 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3566 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3567 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3568 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
3571 static const struct soc15_reg_entry sgpr_init_regs[] = {
3572 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3573 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3574 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3575 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3576 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3577 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3578 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3579 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3580 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
3581 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3584 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3585 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
3586 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
3587 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
3588 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
3589 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
3590 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
3591 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
3592 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
3593 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
3594 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
3595 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
3596 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
3597 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
3598 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
3599 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
3600 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
3601 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
3602 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
3603 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
3604 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
3605 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
3606 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
3607 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
3608 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
3609 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
3610 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
3611 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
3612 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
3613 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
3614 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
3615 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
3616 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
3619 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
3621 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3624 r = amdgpu_ring_alloc(ring, 7);
3626 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
3631 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
3632 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
3634 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3635 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
3636 PACKET3_DMA_DATA_DST_SEL(1) |
3637 PACKET3_DMA_DATA_SRC_SEL(2) |
3638 PACKET3_DMA_DATA_ENGINE(0)));
3639 amdgpu_ring_write(ring, 0);
3640 amdgpu_ring_write(ring, 0);
3641 amdgpu_ring_write(ring, 0);
3642 amdgpu_ring_write(ring, 0);
3643 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
3644 adev->gds.gds_size);
3646 amdgpu_ring_commit(ring);
3648 for (i = 0; i < adev->usec_timeout; i++) {
3649 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
3654 if (i >= adev->usec_timeout)
3657 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
3662 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
3664 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3665 struct amdgpu_ib ib;
3666 struct dma_fence *f = NULL;
3668 unsigned total_size, vgpr_offset, sgpr_offset;
3671 /* only support when RAS is enabled */
3672 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3675 /* bail if the compute ring is not ready */
3676 if (!ring->sched.ready)
3680 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3682 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3683 total_size = ALIGN(total_size, 256);
3684 vgpr_offset = total_size;
3685 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
3686 sgpr_offset = total_size;
3687 total_size += sizeof(sgpr_init_compute_shader);
3689 /* allocate an indirect buffer to put the commands in */
3690 memset(&ib, 0, sizeof(ib));
3691 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
3693 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
3697 /* load the compute shaders */
3698 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
3699 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
3701 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
3702 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
3704 /* init the ib length to 0 */
3708 /* write the register state for the compute dispatch */
3709 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
3710 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3711 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
3712 - PACKET3_SET_SH_REG_START;
3713 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
3715 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3716 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
3717 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3718 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3719 - PACKET3_SET_SH_REG_START;
3720 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3721 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3723 /* write dispatch packet */
3724 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3725 ib.ptr[ib.length_dw++] = 128; /* x */
3726 ib.ptr[ib.length_dw++] = 1; /* y */
3727 ib.ptr[ib.length_dw++] = 1; /* z */
3728 ib.ptr[ib.length_dw++] =
3729 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3731 /* write CS partial flush packet */
3732 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3733 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3736 /* write the register state for the compute dispatch */
3737 for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
3738 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3739 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
3740 - PACKET3_SET_SH_REG_START;
3741 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
3743 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3744 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
3745 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3746 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3747 - PACKET3_SET_SH_REG_START;
3748 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3749 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3751 /* write dispatch packet */
3752 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3753 ib.ptr[ib.length_dw++] = 128; /* x */
3754 ib.ptr[ib.length_dw++] = 1; /* y */
3755 ib.ptr[ib.length_dw++] = 1; /* z */
3756 ib.ptr[ib.length_dw++] =
3757 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3759 /* write CS partial flush packet */
3760 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3761 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3763 /* shedule the ib on the ring */
3764 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
3766 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
3770 /* wait for the GPU to finish processing the IB */
3771 r = dma_fence_wait(f, false);
3773 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
3777 /* read back registers to clear the counters */
3778 mutex_lock(&adev->grbm_idx_mutex);
3779 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
3780 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
3781 for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
3782 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
3783 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3787 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
3788 mutex_unlock(&adev->grbm_idx_mutex);
3791 amdgpu_ib_free(adev, &ib, NULL);
3797 static int gfx_v9_0_early_init(void *handle)
3799 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3801 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
3802 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
3803 gfx_v9_0_set_ring_funcs(adev);
3804 gfx_v9_0_set_irq_funcs(adev);
3805 gfx_v9_0_set_gds_init(adev);
3806 gfx_v9_0_set_rlc_funcs(adev);
3811 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
3812 struct amdgpu_iv_entry *entry);
3814 static int gfx_v9_0_ecc_late_init(void *handle)
3816 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3817 struct ras_common_if **ras_if = &adev->gfx.ras_if;
3818 struct ras_ih_if ih_info = {
3819 .cb = gfx_v9_0_process_ras_data_cb,
3821 struct ras_fs_if fs_info = {
3822 .sysfs_name = "gfx_err_count",
3823 .debugfs_name = "gfx_err_inject",
3825 struct ras_common_if ras_block = {
3826 .block = AMDGPU_RAS_BLOCK__GFX,
3827 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
3828 .sub_block_index = 0,
3833 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
3834 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
3838 r = gfx_v9_0_do_edc_gds_workarounds(adev);
3842 /* requires IBs so do in late init after IB pool is initialized */
3843 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
3847 /* handle resume path. */
3849 /* resend ras TA enable cmd during resume.
3850 * prepare to handle failure.
3852 ih_info.head = **ras_if;
3853 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3856 /* request a gpu reset. will run again. */
3857 amdgpu_ras_request_reset_on_boot(adev,
3858 AMDGPU_RAS_BLOCK__GFX);
3861 /* fail to enable ras, cleanup all. */
3864 /* enable successfully. continue. */
3868 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
3872 **ras_if = ras_block;
3874 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3877 amdgpu_ras_request_reset_on_boot(adev,
3878 AMDGPU_RAS_BLOCK__GFX);
3884 ih_info.head = **ras_if;
3885 fs_info.head = **ras_if;
3887 r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
3891 amdgpu_ras_debugfs_create(adev, &fs_info);
3893 r = amdgpu_ras_sysfs_create(adev, &fs_info);
3897 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
3903 amdgpu_ras_sysfs_remove(adev, *ras_if);
3905 amdgpu_ras_debugfs_remove(adev, *ras_if);
3906 amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
3908 amdgpu_ras_feature_enable(adev, *ras_if, 0);
3915 static int gfx_v9_0_late_init(void *handle)
3917 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3920 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3924 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3928 r = gfx_v9_0_ecc_late_init(handle);
3935 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
3937 uint32_t rlc_setting;
3939 /* if RLC is not enabled, do nothing */
3940 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3941 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3947 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
3952 data = RLC_SAFE_MODE__CMD_MASK;
3953 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3954 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3956 /* wait for RLC_SAFE_MODE */
3957 for (i = 0; i < adev->usec_timeout; i++) {
3958 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3964 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
3968 data = RLC_SAFE_MODE__CMD_MASK;
3969 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3972 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3975 amdgpu_gfx_rlc_enter_safe_mode(adev);
3977 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3978 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
3979 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
3980 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
3982 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
3983 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
3986 amdgpu_gfx_rlc_exit_safe_mode(adev);
3989 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
3992 /* TODO: double check if we need to perform under safe mode */
3993 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
3995 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
3996 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
3998 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4000 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4001 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4003 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4005 /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4008 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4013 amdgpu_gfx_rlc_enter_safe_mode(adev);
4015 /* It is disabled by HW by default */
4016 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4017 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4018 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4020 if (adev->asic_type != CHIP_VEGA12)
4021 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4023 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4024 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4025 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4027 /* only for Vega10 & Raven1 */
4028 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4031 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4033 /* MGLS is a global flag to control all MGLS in GFX */
4034 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4035 /* 2 - RLC memory Light sleep */
4036 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4037 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4038 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4040 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4042 /* 3 - CP memory Light sleep */
4043 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4044 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4045 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4047 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4051 /* 1 - MGCG_OVERRIDE */
4052 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4054 if (adev->asic_type != CHIP_VEGA12)
4055 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4057 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4058 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4059 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4060 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4063 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4065 /* 2 - disable MGLS in RLC */
4066 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4067 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4068 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4069 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4072 /* 3 - disable MGLS in CP */
4073 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4074 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4075 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4076 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4080 amdgpu_gfx_rlc_exit_safe_mode(adev);
4083 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4088 amdgpu_gfx_rlc_enter_safe_mode(adev);
4090 /* Enable 3D CGCG/CGLS */
4091 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4092 /* write cmd to clear cgcg/cgls ov */
4093 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4094 /* unset CGCG override */
4095 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4096 /* update CGCG and CGLS override bits */
4098 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4100 /* enable 3Dcgcg FSM(0x0000363f) */
4101 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4103 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4104 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4105 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4106 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4107 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4109 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4111 /* set IDLE_POLL_COUNT(0x00900100) */
4112 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4113 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4114 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4116 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4118 /* Disable CGCG/CGLS */
4119 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4120 /* disable cgcg, cgls should be disabled */
4121 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4122 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4123 /* disable cgcg and cgls in FSM */
4125 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4128 amdgpu_gfx_rlc_exit_safe_mode(adev);
4131 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4136 amdgpu_gfx_rlc_enter_safe_mode(adev);
4138 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4139 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4140 /* unset CGCG override */
4141 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4142 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4143 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4145 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4146 /* update CGCG and CGLS override bits */
4148 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4150 /* enable cgcg FSM(0x0000363F) */
4151 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4153 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4154 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4155 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4156 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4157 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4159 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4161 /* set IDLE_POLL_COUNT(0x00900100) */
4162 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4163 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4164 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4166 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4168 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4169 /* reset CGCG/CGLS bits */
4170 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4171 /* disable cgcg and cgls in FSM */
4173 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4176 amdgpu_gfx_rlc_exit_safe_mode(adev);
4179 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4183 /* CGCG/CGLS should be enabled after MGCG/MGLS
4184 * === MGCG + MGLS ===
4186 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4187 /* === CGCG /CGLS for GFX 3D Only === */
4188 gfx_v9_0_update_3d_clock_gating(adev, enable);
4189 /* === CGCG + CGLS === */
4190 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4192 /* CGCG/CGLS should be disabled before MGCG/MGLS
4193 * === CGCG + CGLS ===
4195 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4196 /* === CGCG /CGLS for GFX 3D Only === */
4197 gfx_v9_0_update_3d_clock_gating(adev, enable);
4198 /* === MGCG + MGLS === */
4199 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4204 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4205 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4206 .set_safe_mode = gfx_v9_0_set_safe_mode,
4207 .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4208 .init = gfx_v9_0_rlc_init,
4209 .get_csb_size = gfx_v9_0_get_csb_size,
4210 .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4211 .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4212 .resume = gfx_v9_0_rlc_resume,
4213 .stop = gfx_v9_0_rlc_stop,
4214 .reset = gfx_v9_0_rlc_reset,
4215 .start = gfx_v9_0_rlc_start
4218 static int gfx_v9_0_set_powergating_state(void *handle,
4219 enum amd_powergating_state state)
4221 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4222 bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4224 switch (adev->asic_type) {
4227 amdgpu_gfx_off_ctrl(adev, false);
4228 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4230 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4231 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4232 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4234 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4235 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4238 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4239 gfx_v9_0_enable_cp_power_gating(adev, true);
4241 gfx_v9_0_enable_cp_power_gating(adev, false);
4243 /* update gfx cgpg state */
4244 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4246 /* update mgcg state */
4247 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4250 amdgpu_gfx_off_ctrl(adev, true);
4254 amdgpu_gfx_off_ctrl(adev, false);
4255 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4257 amdgpu_gfx_off_ctrl(adev, true);
4267 static int gfx_v9_0_set_clockgating_state(void *handle,
4268 enum amd_clockgating_state state)
4270 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4272 if (amdgpu_sriov_vf(adev))
4275 switch (adev->asic_type) {
4280 gfx_v9_0_update_gfx_clock_gating(adev,
4281 state == AMD_CG_STATE_GATE ? true : false);
4289 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4291 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4294 if (amdgpu_sriov_vf(adev))
4297 /* AMD_CG_SUPPORT_GFX_MGCG */
4298 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4299 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4300 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4302 /* AMD_CG_SUPPORT_GFX_CGCG */
4303 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4304 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4305 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4307 /* AMD_CG_SUPPORT_GFX_CGLS */
4308 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4309 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4311 /* AMD_CG_SUPPORT_GFX_RLC_LS */
4312 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4313 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4314 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4316 /* AMD_CG_SUPPORT_GFX_CP_LS */
4317 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4318 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4319 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4321 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4322 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4323 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4324 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4326 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4327 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4328 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4331 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4333 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4336 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4338 struct amdgpu_device *adev = ring->adev;
4341 /* XXX check if swapping is necessary on BE */
4342 if (ring->use_doorbell) {
4343 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4345 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4346 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4352 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4354 struct amdgpu_device *adev = ring->adev;
4356 if (ring->use_doorbell) {
4357 /* XXX check if swapping is necessary on BE */
4358 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4359 WDOORBELL64(ring->doorbell_index, ring->wptr);
4361 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4362 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4366 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4368 struct amdgpu_device *adev = ring->adev;
4369 u32 ref_and_mask, reg_mem_engine;
4370 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4372 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4375 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4378 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4385 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4386 reg_mem_engine = 1; /* pfp */
4389 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4390 adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4391 adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4392 ref_and_mask, ref_and_mask, 0x20);
4395 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4396 struct amdgpu_job *job,
4397 struct amdgpu_ib *ib,
4400 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4401 u32 header, control = 0;
4403 if (ib->flags & AMDGPU_IB_FLAG_CE)
4404 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4406 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4408 control |= ib->length_dw | (vmid << 24);
4410 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4411 control |= INDIRECT_BUFFER_PRE_ENB(1);
4413 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4414 gfx_v9_0_ring_emit_de_meta(ring);
4417 amdgpu_ring_write(ring, header);
4418 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4419 amdgpu_ring_write(ring,
4423 lower_32_bits(ib->gpu_addr));
4424 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4425 amdgpu_ring_write(ring, control);
4428 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4429 struct amdgpu_job *job,
4430 struct amdgpu_ib *ib,
4433 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4434 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4436 /* Currently, there is a high possibility to get wave ID mismatch
4437 * between ME and GDS, leading to a hw deadlock, because ME generates
4438 * different wave IDs than the GDS expects. This situation happens
4439 * randomly when at least 5 compute pipes use GDS ordered append.
4440 * The wave IDs generated by ME are also wrong after suspend/resume.
4441 * Those are probably bugs somewhere else in the kernel driver.
4443 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4444 * GDS to 0 for this ring (me/pipe).
4446 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4447 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4448 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4449 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4452 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4453 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4454 amdgpu_ring_write(ring,
4458 lower_32_bits(ib->gpu_addr));
4459 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4460 amdgpu_ring_write(ring, control);
4463 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4464 u64 seq, unsigned flags)
4466 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4467 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4468 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4470 /* RELEASE_MEM - flush caches, send int */
4471 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4472 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4473 EOP_TC_NC_ACTION_EN) :
4474 (EOP_TCL1_ACTION_EN |
4476 EOP_TC_WB_ACTION_EN |
4477 EOP_TC_MD_ACTION_EN)) |
4478 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4480 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4483 * the address should be Qword aligned if 64bit write, Dword
4484 * aligned if only send 32bit data low (discard data high)
4490 amdgpu_ring_write(ring, lower_32_bits(addr));
4491 amdgpu_ring_write(ring, upper_32_bits(addr));
4492 amdgpu_ring_write(ring, lower_32_bits(seq));
4493 amdgpu_ring_write(ring, upper_32_bits(seq));
4494 amdgpu_ring_write(ring, 0);
4497 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4499 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4500 uint32_t seq = ring->fence_drv.sync_seq;
4501 uint64_t addr = ring->fence_drv.gpu_addr;
4503 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4504 lower_32_bits(addr), upper_32_bits(addr),
4505 seq, 0xffffffff, 4);
4508 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4509 unsigned vmid, uint64_t pd_addr)
4511 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4513 /* compute doesn't have PFP */
4514 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4515 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4516 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4517 amdgpu_ring_write(ring, 0x0);
4521 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4523 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4526 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4530 /* XXX check if swapping is necessary on BE */
4531 if (ring->use_doorbell)
4532 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4538 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4541 struct amdgpu_device *adev = ring->adev;
4542 int pipe_num, tmp, reg;
4543 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4545 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4547 /* first me only has 2 entries, GFX and HP3D */
4551 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4553 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4557 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4558 struct amdgpu_ring *ring,
4563 struct amdgpu_ring *iring;
4565 mutex_lock(&adev->gfx.pipe_reserve_mutex);
4566 pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
4568 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4570 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4572 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4573 /* Clear all reservations - everyone reacquires all resources */
4574 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4575 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4578 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4579 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4582 /* Lower all pipes without a current reservation */
4583 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4584 iring = &adev->gfx.gfx_ring[i];
4585 pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4589 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4590 gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4593 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4594 iring = &adev->gfx.compute_ring[i];
4595 pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4599 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4600 gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4604 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4607 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4608 struct amdgpu_ring *ring,
4611 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4612 uint32_t queue_priority = acquire ? 0xf : 0x0;
4614 mutex_lock(&adev->srbm_mutex);
4615 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4617 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4618 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4620 soc15_grbm_select(adev, 0, 0, 0, 0);
4621 mutex_unlock(&adev->srbm_mutex);
4624 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4625 enum drm_sched_priority priority)
4627 struct amdgpu_device *adev = ring->adev;
4628 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4630 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4633 gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4634 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4637 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4639 struct amdgpu_device *adev = ring->adev;
4641 /* XXX check if swapping is necessary on BE */
4642 if (ring->use_doorbell) {
4643 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4644 WDOORBELL64(ring->doorbell_index, ring->wptr);
4646 BUG(); /* only DOORBELL method supported on gfx9 now */
4650 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4651 u64 seq, unsigned int flags)
4653 struct amdgpu_device *adev = ring->adev;
4655 /* we only allocate 32bit for each seq wb address */
4656 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4658 /* write fence seq to the "addr" */
4659 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4660 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4661 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4662 amdgpu_ring_write(ring, lower_32_bits(addr));
4663 amdgpu_ring_write(ring, upper_32_bits(addr));
4664 amdgpu_ring_write(ring, lower_32_bits(seq));
4666 if (flags & AMDGPU_FENCE_FLAG_INT) {
4667 /* set register to trigger INT */
4668 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4669 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4670 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4671 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
4672 amdgpu_ring_write(ring, 0);
4673 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4677 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
4679 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4680 amdgpu_ring_write(ring, 0);
4683 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
4685 struct v9_ce_ib_state ce_payload = {0};
4689 cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
4690 csa_addr = amdgpu_csa_vaddr(ring->adev);
4692 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4693 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4694 WRITE_DATA_DST_SEL(8) |
4696 WRITE_DATA_CACHE_POLICY(0));
4697 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4698 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4699 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
4702 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
4704 struct v9_de_ib_state de_payload = {0};
4705 uint64_t csa_addr, gds_addr;
4708 csa_addr = amdgpu_csa_vaddr(ring->adev);
4709 gds_addr = csa_addr + 4096;
4710 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
4711 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
4713 cnt = (sizeof(de_payload) >> 2) + 4 - 2;
4714 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4715 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4716 WRITE_DATA_DST_SEL(8) |
4718 WRITE_DATA_CACHE_POLICY(0));
4719 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4720 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4721 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
4724 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
4726 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4727 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
4730 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
4734 if (amdgpu_sriov_vf(ring->adev))
4735 gfx_v9_0_ring_emit_ce_meta(ring);
4737 gfx_v9_0_ring_emit_tmz(ring, true);
4739 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4740 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4741 /* set load_global_config & load_global_uconfig */
4743 /* set load_cs_sh_regs */
4745 /* set load_per_context_state & load_gfx_sh_regs for GFX */
4748 /* set load_ce_ram if preamble presented */
4749 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
4752 /* still load_ce_ram if this is the first time preamble presented
4753 * although there is no context switch happens.
4755 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
4759 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4760 amdgpu_ring_write(ring, dw2);
4761 amdgpu_ring_write(ring, 0);
4764 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
4767 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4768 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
4769 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
4770 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4771 ret = ring->wptr & ring->buf_mask;
4772 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
4776 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
4779 BUG_ON(offset > ring->buf_mask);
4780 BUG_ON(ring->ring[offset] != 0x55aa55aa);
4782 cur = (ring->wptr & ring->buf_mask) - 1;
4783 if (likely(cur > offset))
4784 ring->ring[offset] = cur - offset;
4786 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
4789 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
4791 struct amdgpu_device *adev = ring->adev;
4793 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4794 amdgpu_ring_write(ring, 0 | /* src: register*/
4795 (5 << 8) | /* dst: memory */
4796 (1 << 20)); /* write confirm */
4797 amdgpu_ring_write(ring, reg);
4798 amdgpu_ring_write(ring, 0);
4799 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4800 adev->virt.reg_val_offs * 4));
4801 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4802 adev->virt.reg_val_offs * 4));
4805 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
4810 switch (ring->funcs->type) {
4811 case AMDGPU_RING_TYPE_GFX:
4812 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4814 case AMDGPU_RING_TYPE_KIQ:
4815 cmd = (1 << 16); /* no inc addr */
4821 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4822 amdgpu_ring_write(ring, cmd);
4823 amdgpu_ring_write(ring, reg);
4824 amdgpu_ring_write(ring, 0);
4825 amdgpu_ring_write(ring, val);
4828 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4829 uint32_t val, uint32_t mask)
4831 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4834 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4835 uint32_t reg0, uint32_t reg1,
4836 uint32_t ref, uint32_t mask)
4838 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4839 struct amdgpu_device *adev = ring->adev;
4840 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
4841 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
4844 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4847 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
4851 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4853 struct amdgpu_device *adev = ring->adev;
4856 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4857 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4858 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4859 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4860 WREG32(mmSQ_CMD, value);
4863 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4864 enum amdgpu_interrupt_state state)
4867 case AMDGPU_IRQ_STATE_DISABLE:
4868 case AMDGPU_IRQ_STATE_ENABLE:
4869 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4870 TIME_STAMP_INT_ENABLE,
4871 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4878 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4880 enum amdgpu_interrupt_state state)
4882 u32 mec_int_cntl, mec_int_cntl_reg;
4885 * amdgpu controls only the first MEC. That's why this function only
4886 * handles the setting of interrupts for this specific MEC. All other
4887 * pipes' interrupts are set by amdkfd.
4893 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4896 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
4899 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
4902 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
4905 DRM_DEBUG("invalid pipe %d\n", pipe);
4909 DRM_DEBUG("invalid me %d\n", me);
4914 case AMDGPU_IRQ_STATE_DISABLE:
4915 mec_int_cntl = RREG32(mec_int_cntl_reg);
4916 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4917 TIME_STAMP_INT_ENABLE, 0);
4918 WREG32(mec_int_cntl_reg, mec_int_cntl);
4920 case AMDGPU_IRQ_STATE_ENABLE:
4921 mec_int_cntl = RREG32(mec_int_cntl_reg);
4922 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4923 TIME_STAMP_INT_ENABLE, 1);
4924 WREG32(mec_int_cntl_reg, mec_int_cntl);
4931 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4932 struct amdgpu_irq_src *source,
4934 enum amdgpu_interrupt_state state)
4937 case AMDGPU_IRQ_STATE_DISABLE:
4938 case AMDGPU_IRQ_STATE_ENABLE:
4939 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4940 PRIV_REG_INT_ENABLE,
4941 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4950 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4951 struct amdgpu_irq_src *source,
4953 enum amdgpu_interrupt_state state)
4956 case AMDGPU_IRQ_STATE_DISABLE:
4957 case AMDGPU_IRQ_STATE_ENABLE:
4958 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4959 PRIV_INSTR_INT_ENABLE,
4960 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4968 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \
4969 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4970 CP_ECC_ERROR_INT_ENABLE, 1)
4972 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \
4973 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4974 CP_ECC_ERROR_INT_ENABLE, 0)
4976 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
4977 struct amdgpu_irq_src *source,
4979 enum amdgpu_interrupt_state state)
4982 case AMDGPU_IRQ_STATE_DISABLE:
4983 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4984 CP_ECC_ERROR_INT_ENABLE, 0);
4985 DISABLE_ECC_ON_ME_PIPE(1, 0);
4986 DISABLE_ECC_ON_ME_PIPE(1, 1);
4987 DISABLE_ECC_ON_ME_PIPE(1, 2);
4988 DISABLE_ECC_ON_ME_PIPE(1, 3);
4991 case AMDGPU_IRQ_STATE_ENABLE:
4992 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4993 CP_ECC_ERROR_INT_ENABLE, 1);
4994 ENABLE_ECC_ON_ME_PIPE(1, 0);
4995 ENABLE_ECC_ON_ME_PIPE(1, 1);
4996 ENABLE_ECC_ON_ME_PIPE(1, 2);
4997 ENABLE_ECC_ON_ME_PIPE(1, 3);
5007 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5008 struct amdgpu_irq_src *src,
5010 enum amdgpu_interrupt_state state)
5013 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5014 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5016 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5017 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5019 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5020 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5022 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5023 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5025 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5026 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5028 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5029 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5031 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5032 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5034 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5035 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5037 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5038 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5046 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5047 struct amdgpu_irq_src *source,
5048 struct amdgpu_iv_entry *entry)
5051 u8 me_id, pipe_id, queue_id;
5052 struct amdgpu_ring *ring;
5054 DRM_DEBUG("IH: CP EOP\n");
5055 me_id = (entry->ring_id & 0x0c) >> 2;
5056 pipe_id = (entry->ring_id & 0x03) >> 0;
5057 queue_id = (entry->ring_id & 0x70) >> 4;
5061 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5065 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5066 ring = &adev->gfx.compute_ring[i];
5067 /* Per-queue interrupt is supported for MEC starting from VI.
5068 * The interrupt can only be enabled/disabled per pipe instead of per queue.
5070 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5071 amdgpu_fence_process(ring);
5078 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5079 struct amdgpu_iv_entry *entry)
5081 u8 me_id, pipe_id, queue_id;
5082 struct amdgpu_ring *ring;
5085 me_id = (entry->ring_id & 0x0c) >> 2;
5086 pipe_id = (entry->ring_id & 0x03) >> 0;
5087 queue_id = (entry->ring_id & 0x70) >> 4;
5091 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5095 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5096 ring = &adev->gfx.compute_ring[i];
5097 if (ring->me == me_id && ring->pipe == pipe_id &&
5098 ring->queue == queue_id)
5099 drm_sched_fault(&ring->sched);
5105 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5106 struct amdgpu_irq_src *source,
5107 struct amdgpu_iv_entry *entry)
5109 DRM_ERROR("Illegal register access in command stream\n");
5110 gfx_v9_0_fault(adev, entry);
5114 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5115 struct amdgpu_irq_src *source,
5116 struct amdgpu_iv_entry *entry)
5118 DRM_ERROR("Illegal instruction in command stream\n");
5119 gfx_v9_0_fault(adev, entry);
5123 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5124 struct amdgpu_iv_entry *entry)
5126 /* TODO ue will trigger an interrupt. */
5127 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5128 amdgpu_ras_reset_gpu(adev, 0);
5129 return AMDGPU_RAS_UE;
5132 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
5133 struct amdgpu_irq_src *source,
5134 struct amdgpu_iv_entry *entry)
5136 struct ras_common_if *ras_if = adev->gfx.ras_if;
5137 struct ras_dispatch_if ih_data = {
5144 ih_data.head = *ras_if;
5146 DRM_ERROR("CP ECC ERROR IRQ\n");
5147 amdgpu_ras_interrupt_dispatch(adev, &ih_data);
5151 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
5153 .early_init = gfx_v9_0_early_init,
5154 .late_init = gfx_v9_0_late_init,
5155 .sw_init = gfx_v9_0_sw_init,
5156 .sw_fini = gfx_v9_0_sw_fini,
5157 .hw_init = gfx_v9_0_hw_init,
5158 .hw_fini = gfx_v9_0_hw_fini,
5159 .suspend = gfx_v9_0_suspend,
5160 .resume = gfx_v9_0_resume,
5161 .is_idle = gfx_v9_0_is_idle,
5162 .wait_for_idle = gfx_v9_0_wait_for_idle,
5163 .soft_reset = gfx_v9_0_soft_reset,
5164 .set_clockgating_state = gfx_v9_0_set_clockgating_state,
5165 .set_powergating_state = gfx_v9_0_set_powergating_state,
5166 .get_clockgating_state = gfx_v9_0_get_clockgating_state,
5169 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
5170 .type = AMDGPU_RING_TYPE_GFX,
5172 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5173 .support_64bit_ptrs = true,
5174 .vmhub = AMDGPU_GFXHUB,
5175 .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
5176 .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
5177 .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
5178 .emit_frame_size = /* totally 242 maximum if 16 IBs */
5180 7 + /* PIPELINE_SYNC */
5181 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5182 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5184 8 + /* FENCE for VM_FLUSH */
5185 20 + /* GDS switch */
5186 4 + /* double SWITCH_BUFFER,
5187 the first COND_EXEC jump to the place just
5188 prior to this double SWITCH_BUFFER */
5196 8 + 8 + /* FENCE x2 */
5197 2, /* SWITCH_BUFFER */
5198 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
5199 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
5200 .emit_fence = gfx_v9_0_ring_emit_fence,
5201 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5202 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5203 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5204 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5205 .test_ring = gfx_v9_0_ring_test_ring,
5206 .test_ib = gfx_v9_0_ring_test_ib,
5207 .insert_nop = amdgpu_ring_insert_nop,
5208 .pad_ib = amdgpu_ring_generic_pad_ib,
5209 .emit_switch_buffer = gfx_v9_ring_emit_sb,
5210 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
5211 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
5212 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
5213 .emit_tmz = gfx_v9_0_ring_emit_tmz,
5214 .emit_wreg = gfx_v9_0_ring_emit_wreg,
5215 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5216 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5217 .soft_recovery = gfx_v9_0_ring_soft_recovery,
5220 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
5221 .type = AMDGPU_RING_TYPE_COMPUTE,
5223 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5224 .support_64bit_ptrs = true,
5225 .vmhub = AMDGPU_GFXHUB,
5226 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5227 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5228 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5230 20 + /* gfx_v9_0_ring_emit_gds_switch */
5231 7 + /* gfx_v9_0_ring_emit_hdp_flush */
5232 5 + /* hdp invalidate */
5233 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5234 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5235 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5236 2 + /* gfx_v9_0_ring_emit_vm_flush */
5237 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
5238 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
5239 .emit_ib = gfx_v9_0_ring_emit_ib_compute,
5240 .emit_fence = gfx_v9_0_ring_emit_fence,
5241 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5242 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5243 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5244 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5245 .test_ring = gfx_v9_0_ring_test_ring,
5246 .test_ib = gfx_v9_0_ring_test_ib,
5247 .insert_nop = amdgpu_ring_insert_nop,
5248 .pad_ib = amdgpu_ring_generic_pad_ib,
5249 .set_priority = gfx_v9_0_ring_set_priority_compute,
5250 .emit_wreg = gfx_v9_0_ring_emit_wreg,
5251 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5252 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5255 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
5256 .type = AMDGPU_RING_TYPE_KIQ,
5258 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5259 .support_64bit_ptrs = true,
5260 .vmhub = AMDGPU_GFXHUB,
5261 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5262 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5263 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5265 20 + /* gfx_v9_0_ring_emit_gds_switch */
5266 7 + /* gfx_v9_0_ring_emit_hdp_flush */
5267 5 + /* hdp invalidate */
5268 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5269 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5270 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5271 2 + /* gfx_v9_0_ring_emit_vm_flush */
5272 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5273 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
5274 .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
5275 .test_ring = gfx_v9_0_ring_test_ring,
5276 .insert_nop = amdgpu_ring_insert_nop,
5277 .pad_ib = amdgpu_ring_generic_pad_ib,
5278 .emit_rreg = gfx_v9_0_ring_emit_rreg,
5279 .emit_wreg = gfx_v9_0_ring_emit_wreg,
5280 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5281 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5284 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
5288 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
5290 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5291 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
5293 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5294 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
5297 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
5298 .set = gfx_v9_0_set_eop_interrupt_state,
5299 .process = gfx_v9_0_eop_irq,
5302 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
5303 .set = gfx_v9_0_set_priv_reg_fault_state,
5304 .process = gfx_v9_0_priv_reg_irq,
5307 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
5308 .set = gfx_v9_0_set_priv_inst_fault_state,
5309 .process = gfx_v9_0_priv_inst_irq,
5312 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
5313 .set = gfx_v9_0_set_cp_ecc_error_state,
5314 .process = gfx_v9_0_cp_ecc_error_irq,
5318 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
5320 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5321 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
5323 adev->gfx.priv_reg_irq.num_types = 1;
5324 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
5326 adev->gfx.priv_inst_irq.num_types = 1;
5327 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
5329 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
5330 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
5333 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
5335 switch (adev->asic_type) {
5340 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
5347 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
5349 /* init asci gds info */
5350 switch (adev->asic_type) {
5354 adev->gds.gds_size = 0x10000;
5357 adev->gds.gds_size = 0x1000;
5360 adev->gds.gds_size = 0x10000;
5364 switch (adev->asic_type) {
5367 adev->gds.gds_compute_max_wave_id = 0x7ff;
5370 adev->gds.gds_compute_max_wave_id = 0x27f;
5373 if (adev->rev_id >= 0x8)
5374 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
5376 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
5379 /* this really depends on the chip */
5380 adev->gds.gds_compute_max_wave_id = 0x7ff;
5384 adev->gds.gws_size = 64;
5385 adev->gds.oa_size = 16;
5388 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
5396 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5397 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5399 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
5402 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5406 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
5407 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
5409 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5410 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5412 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
5414 return (~data) & mask;
5417 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
5418 struct amdgpu_cu_info *cu_info)
5420 int i, j, k, counter, active_cu_number = 0;
5421 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5422 unsigned disable_masks[4 * 2];
5424 if (!adev || !cu_info)
5427 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5429 mutex_lock(&adev->grbm_idx_mutex);
5430 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5431 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5435 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
5437 gfx_v9_0_set_user_cu_inactive_bitmap(
5438 adev, disable_masks[i * 2 + j]);
5439 bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
5440 cu_info->bitmap[i][j] = bitmap;
5442 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5443 if (bitmap & mask) {
5444 if (counter < adev->gfx.config.max_cu_per_sh)
5450 active_cu_number += counter;
5452 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5453 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5456 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5457 mutex_unlock(&adev->grbm_idx_mutex);
5459 cu_info->number = active_cu_number;
5460 cu_info->ao_cu_mask = ao_cu_mask;
5461 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5466 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
5468 .type = AMD_IP_BLOCK_TYPE_GFX,
5472 .funcs = &gfx_v9_0_ip_funcs,