2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
27 #include "amdgpu_gfx.h"
30 #include "amdgpu_atomfirmware.h"
32 #include "gc/gc_9_0_offset.h"
33 #include "gc/gc_9_0_sh_mask.h"
34 #include "vega10_enum.h"
35 #include "hdp/hdp_4_0_offset.h"
38 #include "soc15_common.h"
39 #include "clearstate_gfx9.h"
40 #include "v9_structs.h"
42 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
44 #include "amdgpu_ras.h"
46 #define GFX9_NUM_GFX_RINGS 1
47 #define GFX9_MEC_HPD_SIZE 4096
48 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
49 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
51 #define mmPWR_MISC_CNTL_STATUS 0x0183
52 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0
53 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0
54 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1
55 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L
56 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L
58 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
59 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
60 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
61 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
62 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
63 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
65 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
66 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
67 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
68 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
69 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
70 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
72 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
73 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
74 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
75 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
76 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
77 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
79 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
80 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
81 MODULE_FIRMWARE("amdgpu/raven_me.bin");
82 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
83 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
84 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
86 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
87 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
88 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
89 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
90 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
91 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
94 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
95 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
97 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
98 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
101 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
103 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
104 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
105 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
106 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
107 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
108 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
109 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
110 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
111 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
112 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
113 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
114 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
115 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
116 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
117 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
118 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
119 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
120 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
121 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
122 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
125 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
127 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
128 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
129 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
130 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
131 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
132 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
133 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
134 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
135 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
136 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
137 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
138 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
139 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
140 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
141 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
142 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
143 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
144 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
147 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
149 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
150 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
151 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
152 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
153 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
154 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
155 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
156 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
157 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
158 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
159 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
162 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
164 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
165 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
166 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
167 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
168 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
169 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
170 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
171 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
172 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
173 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
174 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
175 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
176 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
177 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
178 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
179 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
180 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
181 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
182 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
183 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
184 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
185 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
186 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
187 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
190 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
192 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
193 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
194 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
195 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
196 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
197 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
198 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
201 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
203 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
204 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
205 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
206 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
207 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
208 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
209 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
210 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
211 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
212 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
213 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
214 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
215 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
216 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
217 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
218 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
219 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
220 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
221 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
224 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
226 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
227 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
228 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
231 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
233 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
234 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
235 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
236 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
237 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
238 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
239 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
240 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
241 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
242 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
243 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
244 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
245 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
246 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
247 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
248 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
251 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
253 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
254 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
255 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
256 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
257 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
258 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
259 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
260 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
261 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
262 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
263 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
264 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
265 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
268 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
270 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
271 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
272 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
273 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
274 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
275 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
276 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
277 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
280 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
282 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
283 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
284 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
285 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
286 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
287 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
288 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
289 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
292 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
293 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
294 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
295 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
297 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
298 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
299 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
300 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
301 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
302 struct amdgpu_cu_info *cu_info);
303 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
304 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
305 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
307 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
309 switch (adev->asic_type) {
311 if (!amdgpu_virt_support_skip_setting(adev)) {
312 soc15_program_register_sequence(adev,
313 golden_settings_gc_9_0,
314 ARRAY_SIZE(golden_settings_gc_9_0));
315 soc15_program_register_sequence(adev,
316 golden_settings_gc_9_0_vg10,
317 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
321 soc15_program_register_sequence(adev,
322 golden_settings_gc_9_2_1,
323 ARRAY_SIZE(golden_settings_gc_9_2_1));
324 soc15_program_register_sequence(adev,
325 golden_settings_gc_9_2_1_vg12,
326 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
329 soc15_program_register_sequence(adev,
330 golden_settings_gc_9_0,
331 ARRAY_SIZE(golden_settings_gc_9_0));
332 soc15_program_register_sequence(adev,
333 golden_settings_gc_9_0_vg20,
334 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
337 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
338 ARRAY_SIZE(golden_settings_gc_9_1));
339 if (adev->rev_id >= 8)
340 soc15_program_register_sequence(adev,
341 golden_settings_gc_9_1_rv2,
342 ARRAY_SIZE(golden_settings_gc_9_1_rv2));
344 soc15_program_register_sequence(adev,
345 golden_settings_gc_9_1_rv1,
346 ARRAY_SIZE(golden_settings_gc_9_1_rv1));
352 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
353 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
356 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
358 adev->gfx.scratch.num_reg = 8;
359 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
360 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
363 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
364 bool wc, uint32_t reg, uint32_t val)
366 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
367 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
368 WRITE_DATA_DST_SEL(0) |
369 (wc ? WR_CONFIRM : 0));
370 amdgpu_ring_write(ring, reg);
371 amdgpu_ring_write(ring, 0);
372 amdgpu_ring_write(ring, val);
375 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
376 int mem_space, int opt, uint32_t addr0,
377 uint32_t addr1, uint32_t ref, uint32_t mask,
380 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
381 amdgpu_ring_write(ring,
382 /* memory (1) or register (0) */
383 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
384 WAIT_REG_MEM_OPERATION(opt) | /* wait */
385 WAIT_REG_MEM_FUNCTION(3) | /* equal */
386 WAIT_REG_MEM_ENGINE(eng_sel)));
389 BUG_ON(addr0 & 0x3); /* Dword align */
390 amdgpu_ring_write(ring, addr0);
391 amdgpu_ring_write(ring, addr1);
392 amdgpu_ring_write(ring, ref);
393 amdgpu_ring_write(ring, mask);
394 amdgpu_ring_write(ring, inv); /* poll interval */
397 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
399 struct amdgpu_device *adev = ring->adev;
405 r = amdgpu_gfx_scratch_get(adev, &scratch);
409 WREG32(scratch, 0xCAFEDEAD);
410 r = amdgpu_ring_alloc(ring, 3);
412 goto error_free_scratch;
414 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
415 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
416 amdgpu_ring_write(ring, 0xDEADBEEF);
417 amdgpu_ring_commit(ring);
419 for (i = 0; i < adev->usec_timeout; i++) {
420 tmp = RREG32(scratch);
421 if (tmp == 0xDEADBEEF)
426 if (i >= adev->usec_timeout)
430 amdgpu_gfx_scratch_free(adev, scratch);
434 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
436 struct amdgpu_device *adev = ring->adev;
438 struct dma_fence *f = NULL;
445 r = amdgpu_device_wb_get(adev, &index);
449 gpu_addr = adev->wb.gpu_addr + (index * 4);
450 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
451 memset(&ib, 0, sizeof(ib));
452 r = amdgpu_ib_get(adev, NULL, 16, &ib);
456 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
457 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
458 ib.ptr[2] = lower_32_bits(gpu_addr);
459 ib.ptr[3] = upper_32_bits(gpu_addr);
460 ib.ptr[4] = 0xDEADBEEF;
463 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
467 r = dma_fence_wait_timeout(f, false, timeout);
475 tmp = adev->wb.wb[index];
476 if (tmp == 0xDEADBEEF)
482 amdgpu_ib_free(adev, &ib, NULL);
485 amdgpu_device_wb_free(adev, index);
490 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
492 release_firmware(adev->gfx.pfp_fw);
493 adev->gfx.pfp_fw = NULL;
494 release_firmware(adev->gfx.me_fw);
495 adev->gfx.me_fw = NULL;
496 release_firmware(adev->gfx.ce_fw);
497 adev->gfx.ce_fw = NULL;
498 release_firmware(adev->gfx.rlc_fw);
499 adev->gfx.rlc_fw = NULL;
500 release_firmware(adev->gfx.mec_fw);
501 adev->gfx.mec_fw = NULL;
502 release_firmware(adev->gfx.mec2_fw);
503 adev->gfx.mec2_fw = NULL;
505 kfree(adev->gfx.rlc.register_list_format);
508 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
510 const struct rlc_firmware_header_v2_1 *rlc_hdr;
512 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
513 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
514 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
515 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
516 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
517 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
518 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
519 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
520 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
521 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
522 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
523 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
524 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
525 adev->gfx.rlc.reg_list_format_direct_reg_list_length =
526 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
529 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
531 adev->gfx.me_fw_write_wait = false;
532 adev->gfx.mec_fw_write_wait = false;
534 switch (adev->asic_type) {
536 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
537 (adev->gfx.me_feature_version >= 42) &&
538 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
539 (adev->gfx.pfp_feature_version >= 42))
540 adev->gfx.me_fw_write_wait = true;
542 if ((adev->gfx.mec_fw_version >= 0x00000193) &&
543 (adev->gfx.mec_feature_version >= 42))
544 adev->gfx.mec_fw_write_wait = true;
547 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
548 (adev->gfx.me_feature_version >= 44) &&
549 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
550 (adev->gfx.pfp_feature_version >= 44))
551 adev->gfx.me_fw_write_wait = true;
553 if ((adev->gfx.mec_fw_version >= 0x00000196) &&
554 (adev->gfx.mec_feature_version >= 44))
555 adev->gfx.mec_fw_write_wait = true;
558 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
559 (adev->gfx.me_feature_version >= 44) &&
560 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
561 (adev->gfx.pfp_feature_version >= 44))
562 adev->gfx.me_fw_write_wait = true;
564 if ((adev->gfx.mec_fw_version >= 0x00000197) &&
565 (adev->gfx.mec_feature_version >= 44))
566 adev->gfx.mec_fw_write_wait = true;
569 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
570 (adev->gfx.me_feature_version >= 42) &&
571 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
572 (adev->gfx.pfp_feature_version >= 42))
573 adev->gfx.me_fw_write_wait = true;
575 if ((adev->gfx.mec_fw_version >= 0x00000192) &&
576 (adev->gfx.mec_feature_version >= 42))
577 adev->gfx.mec_fw_write_wait = true;
584 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
586 switch (adev->asic_type) {
592 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
594 if ((adev->gfx.rlc_fw_version < 531) ||
595 (adev->gfx.rlc_fw_version == 53815) ||
596 (adev->gfx.rlc_feature_version < 1) ||
597 !adev->gfx.rlc.is_rlc_v2_1)
598 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
605 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
607 const char *chip_name;
610 struct amdgpu_firmware_info *info = NULL;
611 const struct common_firmware_header *header = NULL;
612 const struct gfx_firmware_header_v1_0 *cp_hdr;
613 const struct rlc_firmware_header_v2_0 *rlc_hdr;
614 unsigned int *tmp = NULL;
616 uint16_t version_major;
617 uint16_t version_minor;
621 switch (adev->asic_type) {
623 chip_name = "vega10";
626 chip_name = "vega12";
629 chip_name = "vega20";
632 if (adev->rev_id >= 8)
633 chip_name = "raven2";
634 else if (adev->pdev->device == 0x15d8)
635 chip_name = "picasso";
643 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
644 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
647 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
650 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
651 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
652 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
654 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
655 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
658 err = amdgpu_ucode_validate(adev->gfx.me_fw);
661 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
662 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
663 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
665 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
666 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
669 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
672 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
673 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
674 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
677 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
678 * instead of picasso_rlc.bin.
680 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
681 * or revision >= 0xD8 && revision <= 0xDF
682 * otherwise is PCO FP5
684 if (!strcmp(chip_name, "picasso") &&
685 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
686 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
687 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
689 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
690 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
693 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
694 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
696 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
697 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
698 if (version_major == 2 && version_minor == 1)
699 adev->gfx.rlc.is_rlc_v2_1 = true;
701 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
702 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
703 adev->gfx.rlc.save_and_restore_offset =
704 le32_to_cpu(rlc_hdr->save_and_restore_offset);
705 adev->gfx.rlc.clear_state_descriptor_offset =
706 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
707 adev->gfx.rlc.avail_scratch_ram_locations =
708 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
709 adev->gfx.rlc.reg_restore_list_size =
710 le32_to_cpu(rlc_hdr->reg_restore_list_size);
711 adev->gfx.rlc.reg_list_format_start =
712 le32_to_cpu(rlc_hdr->reg_list_format_start);
713 adev->gfx.rlc.reg_list_format_separate_start =
714 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
715 adev->gfx.rlc.starting_offsets_start =
716 le32_to_cpu(rlc_hdr->starting_offsets_start);
717 adev->gfx.rlc.reg_list_format_size_bytes =
718 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
719 adev->gfx.rlc.reg_list_size_bytes =
720 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
721 adev->gfx.rlc.register_list_format =
722 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
723 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
724 if (!adev->gfx.rlc.register_list_format) {
729 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
730 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
731 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
732 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
734 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
736 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
737 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
738 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
739 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
741 if (adev->gfx.rlc.is_rlc_v2_1)
742 gfx_v9_0_init_rlc_ext_microcode(adev);
744 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
745 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
748 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
751 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
752 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
753 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
756 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
757 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
759 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
762 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
763 adev->gfx.mec2_fw->data;
764 adev->gfx.mec2_fw_version =
765 le32_to_cpu(cp_hdr->header.ucode_version);
766 adev->gfx.mec2_feature_version =
767 le32_to_cpu(cp_hdr->ucode_feature_version);
770 adev->gfx.mec2_fw = NULL;
773 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
774 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
775 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
776 info->fw = adev->gfx.pfp_fw;
777 header = (const struct common_firmware_header *)info->fw->data;
778 adev->firmware.fw_size +=
779 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
781 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
782 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
783 info->fw = adev->gfx.me_fw;
784 header = (const struct common_firmware_header *)info->fw->data;
785 adev->firmware.fw_size +=
786 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
788 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
789 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
790 info->fw = adev->gfx.ce_fw;
791 header = (const struct common_firmware_header *)info->fw->data;
792 adev->firmware.fw_size +=
793 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
795 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
796 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
797 info->fw = adev->gfx.rlc_fw;
798 header = (const struct common_firmware_header *)info->fw->data;
799 adev->firmware.fw_size +=
800 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
802 if (adev->gfx.rlc.is_rlc_v2_1 &&
803 adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
804 adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
805 adev->gfx.rlc.save_restore_list_srm_size_bytes) {
806 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
807 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
808 info->fw = adev->gfx.rlc_fw;
809 adev->firmware.fw_size +=
810 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
812 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
813 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
814 info->fw = adev->gfx.rlc_fw;
815 adev->firmware.fw_size +=
816 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
818 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
819 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
820 info->fw = adev->gfx.rlc_fw;
821 adev->firmware.fw_size +=
822 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
825 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
826 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
827 info->fw = adev->gfx.mec_fw;
828 header = (const struct common_firmware_header *)info->fw->data;
829 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
830 adev->firmware.fw_size +=
831 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
833 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
834 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
835 info->fw = adev->gfx.mec_fw;
836 adev->firmware.fw_size +=
837 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
839 if (adev->gfx.mec2_fw) {
840 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
841 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
842 info->fw = adev->gfx.mec2_fw;
843 header = (const struct common_firmware_header *)info->fw->data;
844 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
845 adev->firmware.fw_size +=
846 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
847 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
848 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
849 info->fw = adev->gfx.mec2_fw;
850 adev->firmware.fw_size +=
851 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
857 gfx_v9_0_check_if_need_gfxoff(adev);
858 gfx_v9_0_check_fw_write_wait(adev);
861 "gfx9: Failed to load firmware \"%s\"\n",
863 release_firmware(adev->gfx.pfp_fw);
864 adev->gfx.pfp_fw = NULL;
865 release_firmware(adev->gfx.me_fw);
866 adev->gfx.me_fw = NULL;
867 release_firmware(adev->gfx.ce_fw);
868 adev->gfx.ce_fw = NULL;
869 release_firmware(adev->gfx.rlc_fw);
870 adev->gfx.rlc_fw = NULL;
871 release_firmware(adev->gfx.mec_fw);
872 adev->gfx.mec_fw = NULL;
873 release_firmware(adev->gfx.mec2_fw);
874 adev->gfx.mec2_fw = NULL;
879 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
882 const struct cs_section_def *sect = NULL;
883 const struct cs_extent_def *ext = NULL;
885 /* begin clear state */
887 /* context control state */
890 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
891 for (ext = sect->section; ext->extent != NULL; ++ext) {
892 if (sect->id == SECT_CONTEXT)
893 count += 2 + ext->reg_count;
899 /* end clear state */
907 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
908 volatile u32 *buffer)
911 const struct cs_section_def *sect = NULL;
912 const struct cs_extent_def *ext = NULL;
914 if (adev->gfx.rlc.cs_data == NULL)
919 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
920 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
922 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
923 buffer[count++] = cpu_to_le32(0x80000000);
924 buffer[count++] = cpu_to_le32(0x80000000);
926 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
927 for (ext = sect->section; ext->extent != NULL; ++ext) {
928 if (sect->id == SECT_CONTEXT) {
930 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
931 buffer[count++] = cpu_to_le32(ext->reg_index -
932 PACKET3_SET_CONTEXT_REG_START);
933 for (i = 0; i < ext->reg_count; i++)
934 buffer[count++] = cpu_to_le32(ext->extent[i]);
941 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
942 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
944 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
945 buffer[count++] = cpu_to_le32(0);
948 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
950 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
951 uint32_t pg_always_on_cu_num = 2;
952 uint32_t always_on_cu_num;
954 uint32_t mask, cu_bitmap, counter;
956 if (adev->flags & AMD_IS_APU)
957 always_on_cu_num = 4;
958 else if (adev->asic_type == CHIP_VEGA12)
959 always_on_cu_num = 8;
961 always_on_cu_num = 12;
963 mutex_lock(&adev->grbm_idx_mutex);
964 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
965 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
969 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
971 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
972 if (cu_info->bitmap[i][j] & mask) {
973 if (counter == pg_always_on_cu_num)
974 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
975 if (counter < always_on_cu_num)
984 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
985 cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
988 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
989 mutex_unlock(&adev->grbm_idx_mutex);
992 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
996 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
997 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
998 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
999 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1000 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1002 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1003 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1005 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1006 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1008 mutex_lock(&adev->grbm_idx_mutex);
1009 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1010 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1011 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1013 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1014 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1015 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1016 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1017 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1019 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1020 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1023 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1026 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1027 * programmed in gfx_v9_0_init_always_on_cu_mask()
1030 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1031 * but used for RLC_LB_CNTL configuration */
1032 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1033 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1034 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1035 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1036 mutex_unlock(&adev->grbm_idx_mutex);
1038 gfx_v9_0_init_always_on_cu_mask(adev);
1041 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1045 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1046 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1047 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1048 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1049 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1051 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1052 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1054 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1055 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1057 mutex_lock(&adev->grbm_idx_mutex);
1058 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1059 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1060 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1062 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1063 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1064 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1065 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1066 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1068 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1069 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1072 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1075 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1076 * programmed in gfx_v9_0_init_always_on_cu_mask()
1079 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1080 * but used for RLC_LB_CNTL configuration */
1081 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1082 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1083 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1084 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1085 mutex_unlock(&adev->grbm_idx_mutex);
1087 gfx_v9_0_init_always_on_cu_mask(adev);
1090 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1092 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1095 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1100 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1102 const struct cs_section_def *cs_data;
1105 adev->gfx.rlc.cs_data = gfx9_cs_data;
1107 cs_data = adev->gfx.rlc.cs_data;
1110 /* init clear state block */
1111 r = amdgpu_gfx_rlc_init_csb(adev);
1116 if (adev->asic_type == CHIP_RAVEN) {
1117 /* TODO: double check the cp_table_size for RV */
1118 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1119 r = amdgpu_gfx_rlc_init_cpt(adev);
1124 switch (adev->asic_type) {
1126 gfx_v9_0_init_lbpw(adev);
1129 gfx_v9_4_init_lbpw(adev);
1138 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1142 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1143 if (unlikely(r != 0))
1146 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1147 AMDGPU_GEM_DOMAIN_VRAM);
1149 adev->gfx.rlc.clear_state_gpu_addr =
1150 amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1152 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1157 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1161 if (!adev->gfx.rlc.clear_state_obj)
1164 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1165 if (likely(r == 0)) {
1166 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1167 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1171 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1173 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1174 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1177 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1181 const __le32 *fw_data;
1184 size_t mec_hpd_size;
1186 const struct gfx_firmware_header_v1_0 *mec_hdr;
1188 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1190 /* take ownership of the relevant compute queues */
1191 amdgpu_gfx_compute_queue_acquire(adev);
1192 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1194 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1195 AMDGPU_GEM_DOMAIN_VRAM,
1196 &adev->gfx.mec.hpd_eop_obj,
1197 &adev->gfx.mec.hpd_eop_gpu_addr,
1200 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1201 gfx_v9_0_mec_fini(adev);
1205 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1207 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1208 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1210 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1212 fw_data = (const __le32 *)
1213 (adev->gfx.mec_fw->data +
1214 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1215 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1217 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1218 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1219 &adev->gfx.mec.mec_fw_obj,
1220 &adev->gfx.mec.mec_fw_gpu_addr,
1223 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1224 gfx_v9_0_mec_fini(adev);
1228 memcpy(fw, fw_data, fw_size);
1230 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1231 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1236 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1238 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1239 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1240 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1241 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1242 (SQ_IND_INDEX__FORCE_READ_MASK));
1243 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1246 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1247 uint32_t wave, uint32_t thread,
1248 uint32_t regno, uint32_t num, uint32_t *out)
1250 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1251 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1252 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1253 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1254 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1255 (SQ_IND_INDEX__FORCE_READ_MASK) |
1256 (SQ_IND_INDEX__AUTO_INCR_MASK));
1258 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1261 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1263 /* type 1 wave data */
1264 dst[(*no_fields)++] = 1;
1265 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1266 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1267 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1268 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1269 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1270 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1271 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1272 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1273 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1274 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1275 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1276 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1277 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1278 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1281 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1282 uint32_t wave, uint32_t start,
1283 uint32_t size, uint32_t *dst)
1286 adev, simd, wave, 0,
1287 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1290 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1291 uint32_t wave, uint32_t thread,
1292 uint32_t start, uint32_t size,
1296 adev, simd, wave, thread,
1297 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1300 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1301 u32 me, u32 pipe, u32 q)
1303 soc15_grbm_select(adev, me, pipe, q, 0);
1306 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1307 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1308 .select_se_sh = &gfx_v9_0_select_se_sh,
1309 .read_wave_data = &gfx_v9_0_read_wave_data,
1310 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1311 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1312 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
1315 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1320 adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1322 switch (adev->asic_type) {
1324 adev->gfx.config.max_hw_contexts = 8;
1325 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1326 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1327 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1328 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1329 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1332 adev->gfx.config.max_hw_contexts = 8;
1333 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1334 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1335 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1336 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1337 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1338 DRM_INFO("fix gfx.config for vega12\n");
1341 adev->gfx.config.max_hw_contexts = 8;
1342 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1343 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1344 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1345 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1346 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1347 gb_addr_config &= ~0xf3e777ff;
1348 gb_addr_config |= 0x22014042;
1349 /* check vbios table if gpu info is not available */
1350 err = amdgpu_atomfirmware_get_gfx_info(adev);
1355 adev->gfx.config.max_hw_contexts = 8;
1356 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1357 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1358 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1359 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1360 if (adev->rev_id >= 8)
1361 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1363 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1370 adev->gfx.config.gb_addr_config = gb_addr_config;
1372 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1374 adev->gfx.config.gb_addr_config,
1378 adev->gfx.config.max_tile_pipes =
1379 adev->gfx.config.gb_addr_config_fields.num_pipes;
1381 adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1383 adev->gfx.config.gb_addr_config,
1386 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1388 adev->gfx.config.gb_addr_config,
1390 MAX_COMPRESSED_FRAGS);
1391 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1393 adev->gfx.config.gb_addr_config,
1396 adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1398 adev->gfx.config.gb_addr_config,
1400 NUM_SHADER_ENGINES);
1401 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1403 adev->gfx.config.gb_addr_config,
1405 PIPE_INTERLEAVE_SIZE));
1410 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1411 struct amdgpu_ngg_buf *ngg_buf,
1413 int default_size_se)
1418 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1421 size_se = size_se ? size_se : default_size_se;
1423 ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1424 r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1425 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1430 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1433 ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1438 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1442 for (i = 0; i < NGG_BUF_MAX; i++)
1443 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1444 &adev->gfx.ngg.buf[i].gpu_addr,
1447 memset(&adev->gfx.ngg.buf[0], 0,
1448 sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1450 adev->gfx.ngg.init = false;
1455 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1459 if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1462 /* GDS reserve memory: 64 bytes alignment */
1463 adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1464 adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size;
1465 adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size;
1466 adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1467 adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1469 /* Primitive Buffer */
1470 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1471 amdgpu_prim_buf_per_se,
1474 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1478 /* Position Buffer */
1479 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1480 amdgpu_pos_buf_per_se,
1483 dev_err(adev->dev, "Failed to create Position Buffer\n");
1487 /* Control Sideband */
1488 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1489 amdgpu_cntl_sb_buf_per_se,
1492 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1496 /* Parameter Cache, not created by default */
1497 if (amdgpu_param_buf_per_se <= 0)
1500 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
1501 amdgpu_param_buf_per_se,
1504 dev_err(adev->dev, "Failed to create Parameter Cache\n");
1509 adev->gfx.ngg.init = true;
1512 gfx_v9_0_ngg_fini(adev);
1516 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1518 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1525 /* Program buffer size */
1526 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1527 adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1528 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1529 adev->gfx.ngg.buf[NGG_POS].size >> 8);
1530 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
1532 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1533 adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1534 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1535 adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
1536 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
1538 /* Program buffer base address */
1539 base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1540 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
1541 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
1543 base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1544 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
1545 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
1547 base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1548 data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
1549 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
1551 base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1552 data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
1553 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
1555 base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1556 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
1557 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
1559 base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1560 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
1561 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
1563 /* Clear GDS reserved memory */
1564 r = amdgpu_ring_alloc(ring, 17);
1566 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
1571 gfx_v9_0_write_data_to_reg(ring, 0, false,
1572 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
1573 (adev->gds.mem.total_size +
1574 adev->gfx.ngg.gds_reserve_size));
1576 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1577 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
1578 PACKET3_DMA_DATA_DST_SEL(1) |
1579 PACKET3_DMA_DATA_SRC_SEL(2)));
1580 amdgpu_ring_write(ring, 0);
1581 amdgpu_ring_write(ring, 0);
1582 amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1583 amdgpu_ring_write(ring, 0);
1584 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1585 adev->gfx.ngg.gds_reserve_size);
1587 gfx_v9_0_write_data_to_reg(ring, 0, false,
1588 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
1590 amdgpu_ring_commit(ring);
1595 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1596 int mec, int pipe, int queue)
1600 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1602 ring = &adev->gfx.compute_ring[ring_id];
1607 ring->queue = queue;
1609 ring->ring_obj = NULL;
1610 ring->use_doorbell = true;
1611 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1612 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1613 + (ring_id * GFX9_MEC_HPD_SIZE);
1614 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1616 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1617 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1620 /* type-2 packets are deprecated on MEC, use type-3 instead */
1621 r = amdgpu_ring_init(adev, ring, 1024,
1622 &adev->gfx.eop_irq, irq_type);
1630 static int gfx_v9_0_sw_init(void *handle)
1632 int i, j, k, r, ring_id;
1633 struct amdgpu_ring *ring;
1634 struct amdgpu_kiq *kiq;
1635 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1637 switch (adev->asic_type) {
1642 adev->gfx.mec.num_mec = 2;
1645 adev->gfx.mec.num_mec = 1;
1649 adev->gfx.mec.num_pipe_per_mec = 4;
1650 adev->gfx.mec.num_queue_per_pipe = 8;
1653 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
1657 /* Privileged reg */
1658 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
1659 &adev->gfx.priv_reg_irq);
1663 /* Privileged inst */
1664 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
1665 &adev->gfx.priv_inst_irq);
1670 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
1671 &adev->gfx.cp_ecc_error_irq);
1676 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
1677 &adev->gfx.cp_ecc_error_irq);
1681 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1683 gfx_v9_0_scratch_init(adev);
1685 r = gfx_v9_0_init_microcode(adev);
1687 DRM_ERROR("Failed to load gfx firmware!\n");
1691 r = adev->gfx.rlc.funcs->init(adev);
1693 DRM_ERROR("Failed to init rlc BOs!\n");
1697 r = gfx_v9_0_mec_init(adev);
1699 DRM_ERROR("Failed to init MEC BOs!\n");
1703 /* set up the gfx ring */
1704 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1705 ring = &adev->gfx.gfx_ring[i];
1706 ring->ring_obj = NULL;
1708 sprintf(ring->name, "gfx");
1710 sprintf(ring->name, "gfx_%d", i);
1711 ring->use_doorbell = true;
1712 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1713 r = amdgpu_ring_init(adev, ring, 1024,
1714 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
1719 /* set up the compute queues - allocate horizontally across pipes */
1721 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1722 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1723 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1724 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1727 r = gfx_v9_0_compute_ring_init(adev,
1738 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
1740 DRM_ERROR("Failed to init KIQ BOs!\n");
1744 kiq = &adev->gfx.kiq;
1745 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1749 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
1750 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
1754 adev->gfx.ce_ram_size = 0x8000;
1756 r = gfx_v9_0_gpu_early_init(adev);
1760 r = gfx_v9_0_ngg_init(adev);
1768 static int gfx_v9_0_sw_fini(void *handle)
1771 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1773 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
1775 struct ras_common_if *ras_if = adev->gfx.ras_if;
1776 struct ras_ih_if ih_info = {
1780 amdgpu_ras_debugfs_remove(adev, ras_if);
1781 amdgpu_ras_sysfs_remove(adev, ras_if);
1782 amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
1783 amdgpu_ras_feature_enable(adev, ras_if, 0);
1787 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
1788 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
1789 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
1791 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1792 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1793 for (i = 0; i < adev->gfx.num_compute_rings; i++)
1794 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1796 amdgpu_gfx_compute_mqd_sw_fini(adev);
1797 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1798 amdgpu_gfx_kiq_fini(adev);
1800 gfx_v9_0_mec_fini(adev);
1801 gfx_v9_0_ngg_fini(adev);
1802 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
1803 &adev->gfx.rlc.clear_state_gpu_addr,
1804 (void **)&adev->gfx.rlc.cs_ptr);
1805 if (adev->asic_type == CHIP_RAVEN) {
1806 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1807 &adev->gfx.rlc.cp_table_gpu_addr,
1808 (void **)&adev->gfx.rlc.cp_table_ptr);
1810 gfx_v9_0_free_microcode(adev);
1816 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1821 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1825 if (instance == 0xffffffff)
1826 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1828 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1830 if (se_num == 0xffffffff)
1831 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1833 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1835 if (sh_num == 0xffffffff)
1836 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1838 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1840 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
1843 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1847 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1848 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
1850 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1851 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1853 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1854 adev->gfx.config.max_sh_per_se);
1856 return (~data) & mask;
1859 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1864 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1865 adev->gfx.config.max_sh_per_se;
1867 mutex_lock(&adev->grbm_idx_mutex);
1868 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1869 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1870 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1871 data = gfx_v9_0_get_rb_active_bitmap(adev);
1872 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1873 rb_bitmap_width_per_sh);
1876 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1877 mutex_unlock(&adev->grbm_idx_mutex);
1879 adev->gfx.config.backend_enable_mask = active_rbs;
1880 adev->gfx.config.num_rbs = hweight32(active_rbs);
1883 #define DEFAULT_SH_MEM_BASES (0x6000)
1884 #define FIRST_COMPUTE_VMID (8)
1885 #define LAST_COMPUTE_VMID (16)
1886 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1889 uint32_t sh_mem_config;
1890 uint32_t sh_mem_bases;
1893 * Configure apertures:
1894 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
1895 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
1896 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
1898 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1900 sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1901 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1902 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1904 mutex_lock(&adev->srbm_mutex);
1905 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1906 soc15_grbm_select(adev, 0, 0, 0, i);
1907 /* CP and shaders */
1908 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1909 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
1911 soc15_grbm_select(adev, 0, 0, 0, 0);
1912 mutex_unlock(&adev->srbm_mutex);
1915 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
1920 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1922 gfx_v9_0_tiling_mode_table_init(adev);
1924 gfx_v9_0_setup_rb(adev);
1925 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
1926 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
1928 /* XXX SH_MEM regs */
1929 /* where to put LDS, scratch, GPUVM in FSA64 space */
1930 mutex_lock(&adev->srbm_mutex);
1931 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
1932 soc15_grbm_select(adev, 0, 0, 0, i);
1933 /* CP and shaders */
1935 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1936 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1937 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
1938 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0);
1940 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1941 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1942 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
1943 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1944 (adev->gmc.private_aperture_start >> 48));
1945 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1946 (adev->gmc.shared_aperture_start >> 48));
1947 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
1950 soc15_grbm_select(adev, 0, 0, 0, 0);
1952 mutex_unlock(&adev->srbm_mutex);
1954 gfx_v9_0_init_compute_vmid(adev);
1956 mutex_lock(&adev->grbm_idx_mutex);
1958 * making sure that the following register writes will be broadcasted
1959 * to all the shaders
1961 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1963 WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE,
1964 (adev->gfx.config.sc_prim_fifo_size_frontend <<
1965 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
1966 (adev->gfx.config.sc_prim_fifo_size_backend <<
1967 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
1968 (adev->gfx.config.sc_hiz_tile_fifo_size <<
1969 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
1970 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
1971 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
1972 mutex_unlock(&adev->grbm_idx_mutex);
1976 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1981 mutex_lock(&adev->grbm_idx_mutex);
1982 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1983 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1984 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1985 for (k = 0; k < adev->usec_timeout; k++) {
1986 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
1990 if (k == adev->usec_timeout) {
1991 gfx_v9_0_select_se_sh(adev, 0xffffffff,
1992 0xffffffff, 0xffffffff);
1993 mutex_unlock(&adev->grbm_idx_mutex);
1994 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2000 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2001 mutex_unlock(&adev->grbm_idx_mutex);
2003 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2004 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2005 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2006 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2007 for (k = 0; k < adev->usec_timeout; k++) {
2008 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2014 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2017 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2019 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2020 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2021 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2022 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2024 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2027 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2030 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2031 adev->gfx.rlc.clear_state_gpu_addr >> 32);
2032 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2033 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2034 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2035 adev->gfx.rlc.clear_state_size);
2038 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2039 int indirect_offset,
2041 int *unique_indirect_regs,
2042 int unique_indirect_reg_count,
2043 int *indirect_start_offsets,
2044 int *indirect_start_offsets_count,
2045 int max_start_offsets_count)
2049 for (; indirect_offset < list_size; indirect_offset++) {
2050 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2051 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2052 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2054 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2055 indirect_offset += 2;
2057 /* look for the matching indice */
2058 for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2059 if (unique_indirect_regs[idx] ==
2060 register_list_format[indirect_offset] ||
2061 !unique_indirect_regs[idx])
2065 BUG_ON(idx >= unique_indirect_reg_count);
2067 if (!unique_indirect_regs[idx])
2068 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2075 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2077 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2078 int unique_indirect_reg_count = 0;
2080 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2081 int indirect_start_offsets_count = 0;
2087 u32 *register_list_format =
2088 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2089 if (!register_list_format)
2091 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
2092 adev->gfx.rlc.reg_list_format_size_bytes);
2094 /* setup unique_indirect_regs array and indirect_start_offsets array */
2095 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2096 gfx_v9_1_parse_ind_reg_list(register_list_format,
2097 adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2098 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2099 unique_indirect_regs,
2100 unique_indirect_reg_count,
2101 indirect_start_offsets,
2102 &indirect_start_offsets_count,
2103 ARRAY_SIZE(indirect_start_offsets));
2105 /* enable auto inc in case it is disabled */
2106 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2107 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2108 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2110 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2111 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2112 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2113 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2114 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2115 adev->gfx.rlc.register_restore[i]);
2117 /* load indirect register */
2118 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2119 adev->gfx.rlc.reg_list_format_start);
2121 /* direct register portion */
2122 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2123 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2124 register_list_format[i]);
2126 /* indirect register portion */
2127 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2128 if (register_list_format[i] == 0xFFFFFFFF) {
2129 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2133 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2134 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2136 for (j = 0; j < unique_indirect_reg_count; j++) {
2137 if (register_list_format[i] == unique_indirect_regs[j]) {
2138 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2143 BUG_ON(j >= unique_indirect_reg_count);
2148 /* set save/restore list size */
2149 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2150 list_size = list_size >> 1;
2151 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2152 adev->gfx.rlc.reg_restore_list_size);
2153 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2155 /* write the starting offsets to RLC scratch ram */
2156 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2157 adev->gfx.rlc.starting_offsets_start);
2158 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2159 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2160 indirect_start_offsets[i]);
2162 /* load unique indirect regs*/
2163 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2164 if (unique_indirect_regs[i] != 0) {
2165 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2166 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2167 unique_indirect_regs[i] & 0x3FFFF);
2169 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2170 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2171 unique_indirect_regs[i] >> 20);
2175 kfree(register_list_format);
2179 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2181 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2184 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2188 uint32_t default_data = 0;
2190 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2191 if (enable == true) {
2192 /* enable GFXIP control over CGPG */
2193 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2194 if(default_data != data)
2195 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2198 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2199 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2200 if(default_data != data)
2201 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2203 /* restore GFXIP control over GCPG */
2204 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2205 if(default_data != data)
2206 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2210 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2214 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2215 AMD_PG_SUPPORT_GFX_SMG |
2216 AMD_PG_SUPPORT_GFX_DMG)) {
2217 /* init IDLE_POLL_COUNT = 60 */
2218 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2219 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2220 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2221 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2223 /* init RLC PG Delay */
2225 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2226 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2227 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2228 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2229 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2231 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2232 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2233 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2234 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2236 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2237 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2238 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2239 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2241 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2242 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2244 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2245 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2246 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2248 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2252 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2256 uint32_t default_data = 0;
2258 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2259 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2260 SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2262 if (default_data != data)
2263 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2266 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2270 uint32_t default_data = 0;
2272 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2273 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2274 SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2276 if(default_data != data)
2277 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2280 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2284 uint32_t default_data = 0;
2286 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2287 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2290 if(default_data != data)
2291 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2294 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2297 uint32_t data, default_data;
2299 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2300 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2301 GFX_POWER_GATING_ENABLE,
2303 if(default_data != data)
2304 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2307 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2310 uint32_t data, default_data;
2312 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2313 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2314 GFX_PIPELINE_PG_ENABLE,
2316 if(default_data != data)
2317 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2320 /* read any GFX register to wake up GFX */
2321 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2324 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2327 uint32_t data, default_data;
2329 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2330 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2331 STATIC_PER_CU_PG_ENABLE,
2333 if(default_data != data)
2334 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2337 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2340 uint32_t data, default_data;
2342 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2343 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2344 DYN_PER_CU_PG_ENABLE,
2346 if(default_data != data)
2347 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2350 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2352 gfx_v9_0_init_csb(adev);
2355 * Rlc save restore list is workable since v2_1.
2356 * And it's needed by gfxoff feature.
2358 if (adev->gfx.rlc.is_rlc_v2_1) {
2359 gfx_v9_1_init_rlc_save_restore_list(adev);
2360 gfx_v9_0_enable_save_restore_machine(adev);
2363 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2364 AMD_PG_SUPPORT_GFX_SMG |
2365 AMD_PG_SUPPORT_GFX_DMG |
2367 AMD_PG_SUPPORT_GDS |
2368 AMD_PG_SUPPORT_RLC_SMU_HS)) {
2369 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2370 adev->gfx.rlc.cp_table_gpu_addr >> 8);
2371 gfx_v9_0_init_gfx_power_gating(adev);
2375 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2377 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2378 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2379 gfx_v9_0_wait_for_rlc_serdes(adev);
2382 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2384 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2386 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2390 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2392 #ifdef AMDGPU_RLC_DEBUG_RETRY
2396 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2399 /* carrizo do enable cp interrupt after cp inited */
2400 if (!(adev->flags & AMD_IS_APU)) {
2401 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2405 #ifdef AMDGPU_RLC_DEBUG_RETRY
2406 /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2407 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2408 if(rlc_ucode_ver == 0x108) {
2409 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2410 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2411 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2412 * default is 0x9C4 to create a 100us interval */
2413 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2414 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2415 * to disable the page fault retry interrupts, default is
2417 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2422 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2424 const struct rlc_firmware_header_v2_0 *hdr;
2425 const __le32 *fw_data;
2426 unsigned i, fw_size;
2428 if (!adev->gfx.rlc_fw)
2431 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2432 amdgpu_ucode_print_rlc_hdr(&hdr->header);
2434 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2435 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2436 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2438 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2439 RLCG_UCODE_LOADING_START_ADDRESS);
2440 for (i = 0; i < fw_size; i++)
2441 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2442 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2447 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2451 if (amdgpu_sriov_vf(adev)) {
2452 gfx_v9_0_init_csb(adev);
2456 adev->gfx.rlc.funcs->stop(adev);
2459 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2461 gfx_v9_0_init_pg(adev);
2463 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2464 /* legacy rlc firmware loading */
2465 r = gfx_v9_0_rlc_load_microcode(adev);
2470 switch (adev->asic_type) {
2472 if (amdgpu_lbpw == 0)
2473 gfx_v9_0_enable_lbpw(adev, false);
2475 gfx_v9_0_enable_lbpw(adev, true);
2478 if (amdgpu_lbpw > 0)
2479 gfx_v9_0_enable_lbpw(adev, true);
2481 gfx_v9_0_enable_lbpw(adev, false);
2487 adev->gfx.rlc.funcs->start(adev);
2492 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2495 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2497 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2498 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2499 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2501 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2502 adev->gfx.gfx_ring[i].sched.ready = false;
2504 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2508 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2510 const struct gfx_firmware_header_v1_0 *pfp_hdr;
2511 const struct gfx_firmware_header_v1_0 *ce_hdr;
2512 const struct gfx_firmware_header_v1_0 *me_hdr;
2513 const __le32 *fw_data;
2514 unsigned i, fw_size;
2516 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2519 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2520 adev->gfx.pfp_fw->data;
2521 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2522 adev->gfx.ce_fw->data;
2523 me_hdr = (const struct gfx_firmware_header_v1_0 *)
2524 adev->gfx.me_fw->data;
2526 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2527 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2528 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2530 gfx_v9_0_cp_gfx_enable(adev, false);
2533 fw_data = (const __le32 *)
2534 (adev->gfx.pfp_fw->data +
2535 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2536 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2537 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2538 for (i = 0; i < fw_size; i++)
2539 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2540 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2543 fw_data = (const __le32 *)
2544 (adev->gfx.ce_fw->data +
2545 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2546 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2547 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2548 for (i = 0; i < fw_size; i++)
2549 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2550 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2553 fw_data = (const __le32 *)
2554 (adev->gfx.me_fw->data +
2555 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2556 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2557 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2558 for (i = 0; i < fw_size; i++)
2559 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2560 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2565 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2567 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2568 const struct cs_section_def *sect = NULL;
2569 const struct cs_extent_def *ext = NULL;
2573 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2574 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2576 gfx_v9_0_cp_gfx_enable(adev, true);
2578 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2580 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2584 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2585 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2587 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2588 amdgpu_ring_write(ring, 0x80000000);
2589 amdgpu_ring_write(ring, 0x80000000);
2591 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2592 for (ext = sect->section; ext->extent != NULL; ++ext) {
2593 if (sect->id == SECT_CONTEXT) {
2594 amdgpu_ring_write(ring,
2595 PACKET3(PACKET3_SET_CONTEXT_REG,
2597 amdgpu_ring_write(ring,
2598 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2599 for (i = 0; i < ext->reg_count; i++)
2600 amdgpu_ring_write(ring, ext->extent[i]);
2605 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2606 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2608 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2609 amdgpu_ring_write(ring, 0);
2611 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2612 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2613 amdgpu_ring_write(ring, 0x8000);
2614 amdgpu_ring_write(ring, 0x8000);
2616 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2617 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2618 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2619 amdgpu_ring_write(ring, tmp);
2620 amdgpu_ring_write(ring, 0);
2622 amdgpu_ring_commit(ring);
2627 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2629 struct amdgpu_ring *ring;
2632 u64 rb_addr, rptr_addr, wptr_gpu_addr;
2634 /* Set the write pointer delay */
2635 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2637 /* set the RB to use vmid 0 */
2638 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2640 /* Set ring buffer size */
2641 ring = &adev->gfx.gfx_ring[0];
2642 rb_bufsz = order_base_2(ring->ring_size / 8);
2643 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2644 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2646 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2648 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2650 /* Initialize the ring buffer's write pointers */
2652 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2653 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2655 /* set the wb address wether it's enabled or not */
2656 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2657 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2658 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2660 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2661 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2662 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
2665 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2667 rb_addr = ring->gpu_addr >> 8;
2668 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2669 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2671 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
2672 if (ring->use_doorbell) {
2673 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2674 DOORBELL_OFFSET, ring->doorbell_index);
2675 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2678 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2680 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
2682 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2683 DOORBELL_RANGE_LOWER, ring->doorbell_index);
2684 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2686 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
2687 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2690 /* start the ring */
2691 gfx_v9_0_cp_gfx_start(adev);
2692 ring->sched.ready = true;
2697 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2702 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
2704 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
2705 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2706 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2707 adev->gfx.compute_ring[i].sched.ready = false;
2708 adev->gfx.kiq.ring.sched.ready = false;
2713 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2715 const struct gfx_firmware_header_v1_0 *mec_hdr;
2716 const __le32 *fw_data;
2720 if (!adev->gfx.mec_fw)
2723 gfx_v9_0_cp_compute_enable(adev, false);
2725 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2726 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2728 fw_data = (const __le32 *)
2729 (adev->gfx.mec_fw->data +
2730 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2732 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2733 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2734 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
2736 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
2737 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
2738 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2739 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2742 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2743 mec_hdr->jt_offset);
2744 for (i = 0; i < mec_hdr->jt_size; i++)
2745 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
2746 le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2748 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2749 adev->gfx.mec_fw_version);
2750 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2756 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2759 struct amdgpu_device *adev = ring->adev;
2761 /* tell RLC which is KIQ queue */
2762 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2764 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2765 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2767 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2770 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2772 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
2773 uint64_t queue_mask = 0;
2776 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2777 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2780 /* This situation may be hit in the future if a new HW
2781 * generation exposes more than 64 queues. If so, the
2782 * definition of queue_mask needs updating */
2783 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
2784 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2788 queue_mask |= (1ull << i);
2791 r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
2793 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2798 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2799 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2800 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
2801 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
2802 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
2803 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
2804 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
2805 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
2806 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
2807 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2808 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2809 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2810 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2812 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2813 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2814 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2815 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2816 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2817 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2818 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2819 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2820 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2821 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2822 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2823 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2824 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2825 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2826 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2827 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2828 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2831 r = amdgpu_ring_test_helper(kiq_ring);
2833 DRM_ERROR("KCQ enable failed\n");
2838 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2840 struct amdgpu_device *adev = ring->adev;
2841 struct v9_mqd *mqd = ring->mqd_ptr;
2842 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2845 mqd->header = 0xC0310800;
2846 mqd->compute_pipelinestat_enable = 0x00000001;
2847 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2848 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2849 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2850 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2851 mqd->compute_misc_reserved = 0x00000003;
2853 mqd->dynamic_cu_mask_addr_lo =
2854 lower_32_bits(ring->mqd_gpu_addr
2855 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2856 mqd->dynamic_cu_mask_addr_hi =
2857 upper_32_bits(ring->mqd_gpu_addr
2858 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2860 eop_base_addr = ring->eop_gpu_addr >> 8;
2861 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2862 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2864 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2865 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
2866 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2867 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
2869 mqd->cp_hqd_eop_control = tmp;
2871 /* enable doorbell? */
2872 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2874 if (ring->use_doorbell) {
2875 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2876 DOORBELL_OFFSET, ring->doorbell_index);
2877 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2879 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2880 DOORBELL_SOURCE, 0);
2881 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2884 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2888 mqd->cp_hqd_pq_doorbell_control = tmp;
2890 /* disable the queue if it's active */
2892 mqd->cp_hqd_dequeue_request = 0;
2893 mqd->cp_hqd_pq_rptr = 0;
2894 mqd->cp_hqd_pq_wptr_lo = 0;
2895 mqd->cp_hqd_pq_wptr_hi = 0;
2897 /* set the pointer to the MQD */
2898 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
2899 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
2901 /* set MQD vmid to 0 */
2902 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
2903 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2904 mqd->cp_mqd_control = tmp;
2906 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2907 hqd_gpu_addr = ring->gpu_addr >> 8;
2908 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2909 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2911 /* set up the HQD, this is similar to CP_RB0_CNTL */
2912 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
2913 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2914 (order_base_2(ring->ring_size / 4) - 1));
2915 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2916 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2918 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
2920 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2921 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
2922 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2923 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2924 mqd->cp_hqd_pq_control = tmp;
2926 /* set the wb address whether it's enabled or not */
2927 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2928 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2929 mqd->cp_hqd_pq_rptr_report_addr_hi =
2930 upper_32_bits(wb_gpu_addr) & 0xffff;
2932 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2933 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2934 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2935 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2938 /* enable the doorbell if requested */
2939 if (ring->use_doorbell) {
2940 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2941 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2942 DOORBELL_OFFSET, ring->doorbell_index);
2944 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2946 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2947 DOORBELL_SOURCE, 0);
2948 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2952 mqd->cp_hqd_pq_doorbell_control = tmp;
2954 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2956 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
2958 /* set the vmid for the queue */
2959 mqd->cp_hqd_vmid = 0;
2961 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
2962 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2963 mqd->cp_hqd_persistent_state = tmp;
2965 /* set MIN_IB_AVAIL_SIZE */
2966 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
2967 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
2968 mqd->cp_hqd_ib_control = tmp;
2970 /* activate the queue */
2971 mqd->cp_hqd_active = 1;
2976 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2978 struct amdgpu_device *adev = ring->adev;
2979 struct v9_mqd *mqd = ring->mqd_ptr;
2982 /* disable wptr polling */
2983 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2985 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
2986 mqd->cp_hqd_eop_base_addr_lo);
2987 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
2988 mqd->cp_hqd_eop_base_addr_hi);
2990 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2991 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
2992 mqd->cp_hqd_eop_control);
2994 /* enable doorbell? */
2995 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
2996 mqd->cp_hqd_pq_doorbell_control);
2998 /* disable the queue if it's active */
2999 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3000 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3001 for (j = 0; j < adev->usec_timeout; j++) {
3002 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3006 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3007 mqd->cp_hqd_dequeue_request);
3008 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3009 mqd->cp_hqd_pq_rptr);
3010 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3011 mqd->cp_hqd_pq_wptr_lo);
3012 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3013 mqd->cp_hqd_pq_wptr_hi);
3016 /* set the pointer to the MQD */
3017 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3018 mqd->cp_mqd_base_addr_lo);
3019 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3020 mqd->cp_mqd_base_addr_hi);
3022 /* set MQD vmid to 0 */
3023 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3024 mqd->cp_mqd_control);
3026 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3027 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3028 mqd->cp_hqd_pq_base_lo);
3029 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3030 mqd->cp_hqd_pq_base_hi);
3032 /* set up the HQD, this is similar to CP_RB0_CNTL */
3033 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3034 mqd->cp_hqd_pq_control);
3036 /* set the wb address whether it's enabled or not */
3037 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3038 mqd->cp_hqd_pq_rptr_report_addr_lo);
3039 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3040 mqd->cp_hqd_pq_rptr_report_addr_hi);
3042 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3043 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3044 mqd->cp_hqd_pq_wptr_poll_addr_lo);
3045 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3046 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3048 /* enable the doorbell if requested */
3049 if (ring->use_doorbell) {
3050 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3051 (adev->doorbell_index.kiq * 2) << 2);
3052 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3053 (adev->doorbell_index.userqueue_end * 2) << 2);
3056 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3057 mqd->cp_hqd_pq_doorbell_control);
3059 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3060 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3061 mqd->cp_hqd_pq_wptr_lo);
3062 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3063 mqd->cp_hqd_pq_wptr_hi);
3065 /* set the vmid for the queue */
3066 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3068 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3069 mqd->cp_hqd_persistent_state);
3071 /* activate the queue */
3072 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3073 mqd->cp_hqd_active);
3075 if (ring->use_doorbell)
3076 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3081 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3083 struct amdgpu_device *adev = ring->adev;
3086 /* disable the queue if it's active */
3087 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3089 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3091 for (j = 0; j < adev->usec_timeout; j++) {
3092 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3097 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3098 DRM_DEBUG("KIQ dequeue request failed.\n");
3100 /* Manual disable if dequeue request times out */
3101 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3104 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3108 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3109 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3110 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3111 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3112 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3113 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3114 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3115 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3120 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3122 struct amdgpu_device *adev = ring->adev;
3123 struct v9_mqd *mqd = ring->mqd_ptr;
3124 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3126 gfx_v9_0_kiq_setting(ring);
3128 if (adev->in_gpu_reset) { /* for GPU_RESET case */
3129 /* reset MQD to a clean status */
3130 if (adev->gfx.mec.mqd_backup[mqd_idx])
3131 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3133 /* reset ring buffer */
3135 amdgpu_ring_clear_ring(ring);
3137 mutex_lock(&adev->srbm_mutex);
3138 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3139 gfx_v9_0_kiq_init_register(ring);
3140 soc15_grbm_select(adev, 0, 0, 0, 0);
3141 mutex_unlock(&adev->srbm_mutex);
3143 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3144 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3145 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3146 mutex_lock(&adev->srbm_mutex);
3147 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3148 gfx_v9_0_mqd_init(ring);
3149 gfx_v9_0_kiq_init_register(ring);
3150 soc15_grbm_select(adev, 0, 0, 0, 0);
3151 mutex_unlock(&adev->srbm_mutex);
3153 if (adev->gfx.mec.mqd_backup[mqd_idx])
3154 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3160 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3162 struct amdgpu_device *adev = ring->adev;
3163 struct v9_mqd *mqd = ring->mqd_ptr;
3164 int mqd_idx = ring - &adev->gfx.compute_ring[0];
3166 if (!adev->in_gpu_reset && !adev->in_suspend) {
3167 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3168 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3169 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3170 mutex_lock(&adev->srbm_mutex);
3171 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3172 gfx_v9_0_mqd_init(ring);
3173 soc15_grbm_select(adev, 0, 0, 0, 0);
3174 mutex_unlock(&adev->srbm_mutex);
3176 if (adev->gfx.mec.mqd_backup[mqd_idx])
3177 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3178 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3179 /* reset MQD to a clean status */
3180 if (adev->gfx.mec.mqd_backup[mqd_idx])
3181 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3183 /* reset ring buffer */
3185 amdgpu_ring_clear_ring(ring);
3187 amdgpu_ring_clear_ring(ring);
3193 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3195 struct amdgpu_ring *ring;
3198 ring = &adev->gfx.kiq.ring;
3200 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3201 if (unlikely(r != 0))
3204 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3205 if (unlikely(r != 0))
3208 gfx_v9_0_kiq_init_queue(ring);
3209 amdgpu_bo_kunmap(ring->mqd_obj);
3210 ring->mqd_ptr = NULL;
3211 amdgpu_bo_unreserve(ring->mqd_obj);
3212 ring->sched.ready = true;
3216 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3218 struct amdgpu_ring *ring = NULL;
3221 gfx_v9_0_cp_compute_enable(adev, true);
3223 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3224 ring = &adev->gfx.compute_ring[i];
3226 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3227 if (unlikely(r != 0))
3229 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3231 r = gfx_v9_0_kcq_init_queue(ring);
3232 amdgpu_bo_kunmap(ring->mqd_obj);
3233 ring->mqd_ptr = NULL;
3235 amdgpu_bo_unreserve(ring->mqd_obj);
3240 r = gfx_v9_0_kiq_kcq_enable(adev);
3245 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3248 struct amdgpu_ring *ring;
3250 if (!(adev->flags & AMD_IS_APU))
3251 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3253 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3254 /* legacy firmware loading */
3255 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3259 r = gfx_v9_0_cp_compute_load_microcode(adev);
3264 r = gfx_v9_0_kiq_resume(adev);
3268 r = gfx_v9_0_cp_gfx_resume(adev);
3272 r = gfx_v9_0_kcq_resume(adev);
3276 ring = &adev->gfx.gfx_ring[0];
3277 r = amdgpu_ring_test_helper(ring);
3281 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3282 ring = &adev->gfx.compute_ring[i];
3283 amdgpu_ring_test_helper(ring);
3286 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3291 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3293 gfx_v9_0_cp_gfx_enable(adev, enable);
3294 gfx_v9_0_cp_compute_enable(adev, enable);
3297 static int gfx_v9_0_hw_init(void *handle)
3300 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3302 gfx_v9_0_init_golden_registers(adev);
3304 gfx_v9_0_constants_init(adev);
3306 r = gfx_v9_0_csb_vram_pin(adev);
3310 r = adev->gfx.rlc.funcs->resume(adev);
3314 r = gfx_v9_0_cp_resume(adev);
3318 r = gfx_v9_0_ngg_en(adev);
3325 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3328 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3330 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3332 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3334 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3335 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3337 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3338 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3339 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3340 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3341 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3342 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3343 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3344 amdgpu_ring_write(kiq_ring, 0);
3345 amdgpu_ring_write(kiq_ring, 0);
3346 amdgpu_ring_write(kiq_ring, 0);
3348 r = amdgpu_ring_test_helper(kiq_ring);
3350 DRM_ERROR("KCQ disable failed\n");
3355 static int gfx_v9_0_hw_fini(void *handle)
3357 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3359 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3360 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3361 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3363 /* disable KCQ to avoid CPC touch memory not valid anymore */
3364 gfx_v9_0_kcq_disable(adev);
3366 if (amdgpu_sriov_vf(adev)) {
3367 gfx_v9_0_cp_gfx_enable(adev, false);
3368 /* must disable polling for SRIOV when hw finished, otherwise
3369 * CPC engine may still keep fetching WB address which is already
3370 * invalid after sw finished and trigger DMAR reading error in
3373 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3377 /* Use deinitialize sequence from CAIL when unbinding device from driver,
3378 * otherwise KIQ is hanging when binding back
3380 if (!adev->in_gpu_reset && !adev->in_suspend) {
3381 mutex_lock(&adev->srbm_mutex);
3382 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3383 adev->gfx.kiq.ring.pipe,
3384 adev->gfx.kiq.ring.queue, 0);
3385 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3386 soc15_grbm_select(adev, 0, 0, 0, 0);
3387 mutex_unlock(&adev->srbm_mutex);
3390 gfx_v9_0_cp_enable(adev, false);
3391 adev->gfx.rlc.funcs->stop(adev);
3393 gfx_v9_0_csb_vram_unpin(adev);
3398 static int gfx_v9_0_suspend(void *handle)
3400 return gfx_v9_0_hw_fini(handle);
3403 static int gfx_v9_0_resume(void *handle)
3405 return gfx_v9_0_hw_init(handle);
3408 static bool gfx_v9_0_is_idle(void *handle)
3410 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3412 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3413 GRBM_STATUS, GUI_ACTIVE))
3419 static int gfx_v9_0_wait_for_idle(void *handle)
3422 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3424 for (i = 0; i < adev->usec_timeout; i++) {
3425 if (gfx_v9_0_is_idle(handle))
3432 static int gfx_v9_0_soft_reset(void *handle)
3434 u32 grbm_soft_reset = 0;
3436 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3439 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3440 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3441 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3442 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3443 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3444 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3445 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3446 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3447 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3448 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3449 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3452 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3453 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3454 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3458 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3459 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3460 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3461 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3464 if (grbm_soft_reset) {
3466 adev->gfx.rlc.funcs->stop(adev);
3468 /* Disable GFX parsing/prefetching */
3469 gfx_v9_0_cp_gfx_enable(adev, false);
3471 /* Disable MEC parsing/prefetching */
3472 gfx_v9_0_cp_compute_enable(adev, false);
3474 if (grbm_soft_reset) {
3475 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3476 tmp |= grbm_soft_reset;
3477 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3478 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3479 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3483 tmp &= ~grbm_soft_reset;
3484 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3485 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3488 /* Wait a little for things to settle down */
3494 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3498 mutex_lock(&adev->gfx.gpu_clock_mutex);
3499 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3500 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3501 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3502 mutex_unlock(&adev->gfx.gpu_clock_mutex);
3506 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3508 uint32_t gds_base, uint32_t gds_size,
3509 uint32_t gws_base, uint32_t gws_size,
3510 uint32_t oa_base, uint32_t oa_size)
3512 struct amdgpu_device *adev = ring->adev;
3515 gfx_v9_0_write_data_to_reg(ring, 0, false,
3516 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3520 gfx_v9_0_write_data_to_reg(ring, 0, false,
3521 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3525 gfx_v9_0_write_data_to_reg(ring, 0, false,
3526 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3527 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3530 gfx_v9_0_write_data_to_reg(ring, 0, false,
3531 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3532 (1 << (oa_size + oa_base)) - (1 << oa_base));
3535 static const u32 vgpr_init_compute_shader[] =
3537 0xb07c0000, 0xbe8000ff,
3538 0x000000f8, 0xbf110800,
3539 0x7e000280, 0x7e020280,
3540 0x7e040280, 0x7e060280,
3541 0x7e080280, 0x7e0a0280,
3542 0x7e0c0280, 0x7e0e0280,
3543 0x80808800, 0xbe803200,
3544 0xbf84fff5, 0xbf9c0000,
3545 0xd28c0001, 0x0001007f,
3546 0xd28d0001, 0x0002027e,
3547 0x10020288, 0xb8810904,
3548 0xb7814000, 0xd1196a01,
3549 0x00000301, 0xbe800087,
3550 0xbefc00c1, 0xd89c4000,
3551 0x00020201, 0xd89cc080,
3552 0x00040401, 0x320202ff,
3553 0x00000800, 0x80808100,
3554 0xbf84fff8, 0x7e020280,
3555 0xbf810000, 0x00000000,
3558 static const u32 sgpr_init_compute_shader[] =
3560 0xb07c0000, 0xbe8000ff,
3561 0x0000005f, 0xbee50080,
3562 0xbe812c65, 0xbe822c65,
3563 0xbe832c65, 0xbe842c65,
3564 0xbe852c65, 0xb77c0005,
3565 0x80808500, 0xbf84fff8,
3566 0xbe800080, 0xbf810000,
3569 static const struct soc15_reg_entry vgpr_init_regs[] = {
3570 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3571 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3572 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3573 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3574 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3575 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3576 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3577 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3578 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3579 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
3582 static const struct soc15_reg_entry sgpr_init_regs[] = {
3583 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3584 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3585 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3586 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3587 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3588 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3589 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3590 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3591 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
3592 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3595 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3596 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT) },
3597 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT) },
3598 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT) },
3599 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT) },
3600 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT) },
3601 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT) },
3602 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT) },
3603 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT) },
3604 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT) },
3605 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT) },
3606 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT) },
3607 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED) },
3608 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT) },
3609 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT) },
3610 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT) },
3611 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO) },
3612 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT) },
3613 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT) },
3614 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT) },
3615 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT) },
3616 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT) },
3617 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2) },
3618 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT) },
3619 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT) },
3620 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT) },
3621 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT) },
3622 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT) },
3623 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2) },
3624 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT) },
3625 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2) },
3626 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT) },
3629 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
3631 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3632 struct amdgpu_ib ib;
3633 struct dma_fence *f = NULL;
3636 unsigned total_size, vgpr_offset, sgpr_offset;
3639 /* only support when RAS is enabled */
3640 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3643 /* bail if the compute ring is not ready */
3644 if (!ring->sched.ready)
3647 tmp = RREG32_SOC15(GC, 0, mmGB_EDC_MODE);
3648 WREG32_SOC15(GC, 0, mmGB_EDC_MODE, 0);
3651 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3653 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3654 total_size = ALIGN(total_size, 256);
3655 vgpr_offset = total_size;
3656 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
3657 sgpr_offset = total_size;
3658 total_size += sizeof(sgpr_init_compute_shader);
3660 /* allocate an indirect buffer to put the commands in */
3661 memset(&ib, 0, sizeof(ib));
3662 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
3664 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
3668 /* load the compute shaders */
3669 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
3670 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
3672 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
3673 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
3675 /* init the ib length to 0 */
3679 /* write the register state for the compute dispatch */
3680 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
3681 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3682 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
3683 - PACKET3_SET_SH_REG_START;
3684 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
3686 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3687 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
3688 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3689 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3690 - PACKET3_SET_SH_REG_START;
3691 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3692 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3694 /* write dispatch packet */
3695 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3696 ib.ptr[ib.length_dw++] = 128; /* x */
3697 ib.ptr[ib.length_dw++] = 1; /* y */
3698 ib.ptr[ib.length_dw++] = 1; /* z */
3699 ib.ptr[ib.length_dw++] =
3700 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3702 /* write CS partial flush packet */
3703 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3704 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3707 /* write the register state for the compute dispatch */
3708 for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
3709 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3710 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
3711 - PACKET3_SET_SH_REG_START;
3712 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
3714 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3715 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
3716 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3717 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3718 - PACKET3_SET_SH_REG_START;
3719 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3720 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3722 /* write dispatch packet */
3723 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3724 ib.ptr[ib.length_dw++] = 128; /* x */
3725 ib.ptr[ib.length_dw++] = 1; /* y */
3726 ib.ptr[ib.length_dw++] = 1; /* z */
3727 ib.ptr[ib.length_dw++] =
3728 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3730 /* write CS partial flush packet */
3731 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3732 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3734 /* shedule the ib on the ring */
3735 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
3737 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
3741 /* wait for the GPU to finish processing the IB */
3742 r = dma_fence_wait(f, false);
3744 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
3748 /* read back registers to clear the counters */
3749 mutex_lock(&adev->grbm_idx_mutex);
3750 for (j = 0; j < 16; j++) {
3751 gfx_v9_0_select_se_sh(adev, 0x01, 0x0, j);
3752 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3753 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3754 gfx_v9_0_select_se_sh(adev, 0x02, 0x0, j);
3755 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3756 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3757 gfx_v9_0_select_se_sh(adev, 0x03, 0x0, j);
3758 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3759 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3760 gfx_v9_0_select_se_sh(adev, 0x04, 0x0, j);
3761 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3762 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3764 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
3765 mutex_unlock(&adev->grbm_idx_mutex);
3768 amdgpu_ib_free(adev, &ib, NULL);
3774 static int gfx_v9_0_early_init(void *handle)
3776 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3778 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
3779 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
3780 gfx_v9_0_set_ring_funcs(adev);
3781 gfx_v9_0_set_irq_funcs(adev);
3782 gfx_v9_0_set_gds_init(adev);
3783 gfx_v9_0_set_rlc_funcs(adev);
3788 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
3789 struct amdgpu_iv_entry *entry);
3791 static int gfx_v9_0_ecc_late_init(void *handle)
3793 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3794 struct ras_common_if **ras_if = &adev->gfx.ras_if;
3795 struct ras_ih_if ih_info = {
3796 .cb = gfx_v9_0_process_ras_data_cb,
3798 struct ras_fs_if fs_info = {
3799 .sysfs_name = "gfx_err_count",
3800 .debugfs_name = "gfx_err_inject",
3802 struct ras_common_if ras_block = {
3803 .block = AMDGPU_RAS_BLOCK__GFX,
3804 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
3805 .sub_block_index = 0,
3810 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
3811 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
3818 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
3822 /* requires IBs so do in late init after IB pool is initialized */
3823 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
3827 **ras_if = ras_block;
3829 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3832 amdgpu_ras_request_reset_on_boot(adev,
3833 AMDGPU_RAS_BLOCK__GFX);
3839 ih_info.head = **ras_if;
3840 fs_info.head = **ras_if;
3842 r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
3846 r = amdgpu_ras_debugfs_create(adev, &fs_info);
3850 r = amdgpu_ras_sysfs_create(adev, &fs_info);
3854 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
3860 amdgpu_ras_sysfs_remove(adev, *ras_if);
3862 amdgpu_ras_debugfs_remove(adev, *ras_if);
3864 amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
3866 amdgpu_ras_feature_enable(adev, *ras_if, 0);
3873 static int gfx_v9_0_late_init(void *handle)
3875 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3878 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3882 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3886 r = gfx_v9_0_ecc_late_init(handle);
3893 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
3895 uint32_t rlc_setting;
3897 /* if RLC is not enabled, do nothing */
3898 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3899 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3905 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
3910 data = RLC_SAFE_MODE__CMD_MASK;
3911 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3912 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3914 /* wait for RLC_SAFE_MODE */
3915 for (i = 0; i < adev->usec_timeout; i++) {
3916 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3922 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
3926 data = RLC_SAFE_MODE__CMD_MASK;
3927 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3930 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3933 amdgpu_gfx_rlc_enter_safe_mode(adev);
3935 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3936 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
3937 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
3938 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
3940 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
3941 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
3944 amdgpu_gfx_rlc_exit_safe_mode(adev);
3947 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
3950 /* TODO: double check if we need to perform under safe mode */
3951 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
3953 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
3954 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
3956 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
3958 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
3959 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
3961 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
3963 /* gfx_v9_0_exit_rlc_safe_mode(adev); */
3966 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
3971 amdgpu_gfx_rlc_enter_safe_mode(adev);
3973 /* It is disabled by HW by default */
3974 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
3975 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
3976 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
3978 if (adev->asic_type != CHIP_VEGA12)
3979 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
3981 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
3982 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
3983 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
3985 /* only for Vega10 & Raven1 */
3986 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
3989 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
3991 /* MGLS is a global flag to control all MGLS in GFX */
3992 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
3993 /* 2 - RLC memory Light sleep */
3994 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
3995 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
3996 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
3998 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4000 /* 3 - CP memory Light sleep */
4001 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4002 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4003 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4005 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4009 /* 1 - MGCG_OVERRIDE */
4010 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4012 if (adev->asic_type != CHIP_VEGA12)
4013 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4015 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4016 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4017 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4018 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4021 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4023 /* 2 - disable MGLS in RLC */
4024 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4025 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4026 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4027 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4030 /* 3 - disable MGLS in CP */
4031 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4032 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4033 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4034 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4038 amdgpu_gfx_rlc_exit_safe_mode(adev);
4041 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4046 amdgpu_gfx_rlc_enter_safe_mode(adev);
4048 /* Enable 3D CGCG/CGLS */
4049 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4050 /* write cmd to clear cgcg/cgls ov */
4051 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4052 /* unset CGCG override */
4053 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4054 /* update CGCG and CGLS override bits */
4056 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4058 /* enable 3Dcgcg FSM(0x0000363f) */
4059 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4061 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4062 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4063 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4064 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4065 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4067 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4069 /* set IDLE_POLL_COUNT(0x00900100) */
4070 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4071 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4072 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4074 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4076 /* Disable CGCG/CGLS */
4077 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4078 /* disable cgcg, cgls should be disabled */
4079 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4080 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4081 /* disable cgcg and cgls in FSM */
4083 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4086 amdgpu_gfx_rlc_exit_safe_mode(adev);
4089 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4094 amdgpu_gfx_rlc_enter_safe_mode(adev);
4096 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4097 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4098 /* unset CGCG override */
4099 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4100 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4101 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4103 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4104 /* update CGCG and CGLS override bits */
4106 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4108 /* enable cgcg FSM(0x0000363F) */
4109 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4111 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4112 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4113 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4114 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4115 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4117 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4119 /* set IDLE_POLL_COUNT(0x00900100) */
4120 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4121 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4122 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4124 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4126 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4127 /* reset CGCG/CGLS bits */
4128 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4129 /* disable cgcg and cgls in FSM */
4131 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4134 amdgpu_gfx_rlc_exit_safe_mode(adev);
4137 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4141 /* CGCG/CGLS should be enabled after MGCG/MGLS
4142 * === MGCG + MGLS ===
4144 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4145 /* === CGCG /CGLS for GFX 3D Only === */
4146 gfx_v9_0_update_3d_clock_gating(adev, enable);
4147 /* === CGCG + CGLS === */
4148 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4150 /* CGCG/CGLS should be disabled before MGCG/MGLS
4151 * === CGCG + CGLS ===
4153 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4154 /* === CGCG /CGLS for GFX 3D Only === */
4155 gfx_v9_0_update_3d_clock_gating(adev, enable);
4156 /* === MGCG + MGLS === */
4157 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4162 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4163 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4164 .set_safe_mode = gfx_v9_0_set_safe_mode,
4165 .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4166 .init = gfx_v9_0_rlc_init,
4167 .get_csb_size = gfx_v9_0_get_csb_size,
4168 .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4169 .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4170 .resume = gfx_v9_0_rlc_resume,
4171 .stop = gfx_v9_0_rlc_stop,
4172 .reset = gfx_v9_0_rlc_reset,
4173 .start = gfx_v9_0_rlc_start
4176 static int gfx_v9_0_set_powergating_state(void *handle,
4177 enum amd_powergating_state state)
4179 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4180 bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4182 switch (adev->asic_type) {
4185 amdgpu_gfx_off_ctrl(adev, false);
4186 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4188 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4189 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4190 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4192 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4193 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4196 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4197 gfx_v9_0_enable_cp_power_gating(adev, true);
4199 gfx_v9_0_enable_cp_power_gating(adev, false);
4201 /* update gfx cgpg state */
4202 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4204 /* update mgcg state */
4205 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4208 amdgpu_gfx_off_ctrl(adev, true);
4212 amdgpu_gfx_off_ctrl(adev, false);
4213 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4215 amdgpu_gfx_off_ctrl(adev, true);
4225 static int gfx_v9_0_set_clockgating_state(void *handle,
4226 enum amd_clockgating_state state)
4228 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4230 if (amdgpu_sriov_vf(adev))
4233 switch (adev->asic_type) {
4238 gfx_v9_0_update_gfx_clock_gating(adev,
4239 state == AMD_CG_STATE_GATE ? true : false);
4247 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4249 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4252 if (amdgpu_sriov_vf(adev))
4255 /* AMD_CG_SUPPORT_GFX_MGCG */
4256 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4257 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4258 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4260 /* AMD_CG_SUPPORT_GFX_CGCG */
4261 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4262 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4263 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4265 /* AMD_CG_SUPPORT_GFX_CGLS */
4266 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4267 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4269 /* AMD_CG_SUPPORT_GFX_RLC_LS */
4270 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4271 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4272 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4274 /* AMD_CG_SUPPORT_GFX_CP_LS */
4275 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4276 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4277 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4279 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4280 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4281 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4282 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4284 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4285 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4286 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4289 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4291 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4294 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4296 struct amdgpu_device *adev = ring->adev;
4299 /* XXX check if swapping is necessary on BE */
4300 if (ring->use_doorbell) {
4301 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4303 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4304 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4310 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4312 struct amdgpu_device *adev = ring->adev;
4314 if (ring->use_doorbell) {
4315 /* XXX check if swapping is necessary on BE */
4316 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4317 WDOORBELL64(ring->doorbell_index, ring->wptr);
4319 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4320 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4324 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4326 struct amdgpu_device *adev = ring->adev;
4327 u32 ref_and_mask, reg_mem_engine;
4328 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4330 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4333 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4336 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4343 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4344 reg_mem_engine = 1; /* pfp */
4347 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4348 adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4349 adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4350 ref_and_mask, ref_and_mask, 0x20);
4353 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4354 struct amdgpu_job *job,
4355 struct amdgpu_ib *ib,
4358 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4359 u32 header, control = 0;
4361 if (ib->flags & AMDGPU_IB_FLAG_CE)
4362 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4364 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4366 control |= ib->length_dw | (vmid << 24);
4368 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4369 control |= INDIRECT_BUFFER_PRE_ENB(1);
4371 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4372 gfx_v9_0_ring_emit_de_meta(ring);
4375 amdgpu_ring_write(ring, header);
4376 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4377 amdgpu_ring_write(ring,
4381 lower_32_bits(ib->gpu_addr));
4382 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4383 amdgpu_ring_write(ring, control);
4386 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4387 struct amdgpu_job *job,
4388 struct amdgpu_ib *ib,
4391 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4392 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4394 /* Currently, there is a high possibility to get wave ID mismatch
4395 * between ME and GDS, leading to a hw deadlock, because ME generates
4396 * different wave IDs than the GDS expects. This situation happens
4397 * randomly when at least 5 compute pipes use GDS ordered append.
4398 * The wave IDs generated by ME are also wrong after suspend/resume.
4399 * Those are probably bugs somewhere else in the kernel driver.
4401 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4402 * GDS to 0 for this ring (me/pipe).
4404 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4405 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4406 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4407 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4410 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4411 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4412 amdgpu_ring_write(ring,
4416 lower_32_bits(ib->gpu_addr));
4417 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4418 amdgpu_ring_write(ring, control);
4421 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4422 u64 seq, unsigned flags)
4424 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4425 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4426 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4428 /* RELEASE_MEM - flush caches, send int */
4429 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4430 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4431 EOP_TC_NC_ACTION_EN) :
4432 (EOP_TCL1_ACTION_EN |
4434 EOP_TC_WB_ACTION_EN |
4435 EOP_TC_MD_ACTION_EN)) |
4436 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4438 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4441 * the address should be Qword aligned if 64bit write, Dword
4442 * aligned if only send 32bit data low (discard data high)
4448 amdgpu_ring_write(ring, lower_32_bits(addr));
4449 amdgpu_ring_write(ring, upper_32_bits(addr));
4450 amdgpu_ring_write(ring, lower_32_bits(seq));
4451 amdgpu_ring_write(ring, upper_32_bits(seq));
4452 amdgpu_ring_write(ring, 0);
4455 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4457 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4458 uint32_t seq = ring->fence_drv.sync_seq;
4459 uint64_t addr = ring->fence_drv.gpu_addr;
4461 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4462 lower_32_bits(addr), upper_32_bits(addr),
4463 seq, 0xffffffff, 4);
4466 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4467 unsigned vmid, uint64_t pd_addr)
4469 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4471 /* compute doesn't have PFP */
4472 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4473 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4474 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4475 amdgpu_ring_write(ring, 0x0);
4479 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4481 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4484 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4488 /* XXX check if swapping is necessary on BE */
4489 if (ring->use_doorbell)
4490 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4496 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4499 struct amdgpu_device *adev = ring->adev;
4500 int pipe_num, tmp, reg;
4501 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4503 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4505 /* first me only has 2 entries, GFX and HP3D */
4509 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4511 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4515 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4516 struct amdgpu_ring *ring,
4521 struct amdgpu_ring *iring;
4523 mutex_lock(&adev->gfx.pipe_reserve_mutex);
4524 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
4526 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4528 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4530 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4531 /* Clear all reservations - everyone reacquires all resources */
4532 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4533 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4536 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4537 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4540 /* Lower all pipes without a current reservation */
4541 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4542 iring = &adev->gfx.gfx_ring[i];
4543 pipe = amdgpu_gfx_queue_to_bit(adev,
4547 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4548 gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4551 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4552 iring = &adev->gfx.compute_ring[i];
4553 pipe = amdgpu_gfx_queue_to_bit(adev,
4557 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4558 gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4562 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4565 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4566 struct amdgpu_ring *ring,
4569 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4570 uint32_t queue_priority = acquire ? 0xf : 0x0;
4572 mutex_lock(&adev->srbm_mutex);
4573 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4575 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4576 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4578 soc15_grbm_select(adev, 0, 0, 0, 0);
4579 mutex_unlock(&adev->srbm_mutex);
4582 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4583 enum drm_sched_priority priority)
4585 struct amdgpu_device *adev = ring->adev;
4586 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4588 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4591 gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4592 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4595 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4597 struct amdgpu_device *adev = ring->adev;
4599 /* XXX check if swapping is necessary on BE */
4600 if (ring->use_doorbell) {
4601 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4602 WDOORBELL64(ring->doorbell_index, ring->wptr);
4604 BUG(); /* only DOORBELL method supported on gfx9 now */
4608 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4609 u64 seq, unsigned int flags)
4611 struct amdgpu_device *adev = ring->adev;
4613 /* we only allocate 32bit for each seq wb address */
4614 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4616 /* write fence seq to the "addr" */
4617 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4618 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4619 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4620 amdgpu_ring_write(ring, lower_32_bits(addr));
4621 amdgpu_ring_write(ring, upper_32_bits(addr));
4622 amdgpu_ring_write(ring, lower_32_bits(seq));
4624 if (flags & AMDGPU_FENCE_FLAG_INT) {
4625 /* set register to trigger INT */
4626 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4627 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4628 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4629 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
4630 amdgpu_ring_write(ring, 0);
4631 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4635 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
4637 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4638 amdgpu_ring_write(ring, 0);
4641 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
4643 struct v9_ce_ib_state ce_payload = {0};
4647 cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
4648 csa_addr = amdgpu_csa_vaddr(ring->adev);
4650 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4651 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4652 WRITE_DATA_DST_SEL(8) |
4654 WRITE_DATA_CACHE_POLICY(0));
4655 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4656 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4657 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
4660 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
4662 struct v9_de_ib_state de_payload = {0};
4663 uint64_t csa_addr, gds_addr;
4666 csa_addr = amdgpu_csa_vaddr(ring->adev);
4667 gds_addr = csa_addr + 4096;
4668 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
4669 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
4671 cnt = (sizeof(de_payload) >> 2) + 4 - 2;
4672 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4673 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4674 WRITE_DATA_DST_SEL(8) |
4676 WRITE_DATA_CACHE_POLICY(0));
4677 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4678 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4679 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
4682 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
4684 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4685 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
4688 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
4692 if (amdgpu_sriov_vf(ring->adev))
4693 gfx_v9_0_ring_emit_ce_meta(ring);
4695 gfx_v9_0_ring_emit_tmz(ring, true);
4697 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4698 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4699 /* set load_global_config & load_global_uconfig */
4701 /* set load_cs_sh_regs */
4703 /* set load_per_context_state & load_gfx_sh_regs for GFX */
4706 /* set load_ce_ram if preamble presented */
4707 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
4710 /* still load_ce_ram if this is the first time preamble presented
4711 * although there is no context switch happens.
4713 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
4717 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4718 amdgpu_ring_write(ring, dw2);
4719 amdgpu_ring_write(ring, 0);
4722 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
4725 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4726 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
4727 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
4728 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4729 ret = ring->wptr & ring->buf_mask;
4730 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
4734 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
4737 BUG_ON(offset > ring->buf_mask);
4738 BUG_ON(ring->ring[offset] != 0x55aa55aa);
4740 cur = (ring->wptr & ring->buf_mask) - 1;
4741 if (likely(cur > offset))
4742 ring->ring[offset] = cur - offset;
4744 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
4747 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
4749 struct amdgpu_device *adev = ring->adev;
4751 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4752 amdgpu_ring_write(ring, 0 | /* src: register*/
4753 (5 << 8) | /* dst: memory */
4754 (1 << 20)); /* write confirm */
4755 amdgpu_ring_write(ring, reg);
4756 amdgpu_ring_write(ring, 0);
4757 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4758 adev->virt.reg_val_offs * 4));
4759 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4760 adev->virt.reg_val_offs * 4));
4763 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
4768 switch (ring->funcs->type) {
4769 case AMDGPU_RING_TYPE_GFX:
4770 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4772 case AMDGPU_RING_TYPE_KIQ:
4773 cmd = (1 << 16); /* no inc addr */
4779 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4780 amdgpu_ring_write(ring, cmd);
4781 amdgpu_ring_write(ring, reg);
4782 amdgpu_ring_write(ring, 0);
4783 amdgpu_ring_write(ring, val);
4786 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4787 uint32_t val, uint32_t mask)
4789 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4792 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4793 uint32_t reg0, uint32_t reg1,
4794 uint32_t ref, uint32_t mask)
4796 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4797 struct amdgpu_device *adev = ring->adev;
4798 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
4799 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
4802 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4805 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
4809 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4811 struct amdgpu_device *adev = ring->adev;
4814 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4815 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4816 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4817 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4818 WREG32(mmSQ_CMD, value);
4821 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4822 enum amdgpu_interrupt_state state)
4825 case AMDGPU_IRQ_STATE_DISABLE:
4826 case AMDGPU_IRQ_STATE_ENABLE:
4827 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4828 TIME_STAMP_INT_ENABLE,
4829 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4836 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4838 enum amdgpu_interrupt_state state)
4840 u32 mec_int_cntl, mec_int_cntl_reg;
4843 * amdgpu controls only the first MEC. That's why this function only
4844 * handles the setting of interrupts for this specific MEC. All other
4845 * pipes' interrupts are set by amdkfd.
4851 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4854 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
4857 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
4860 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
4863 DRM_DEBUG("invalid pipe %d\n", pipe);
4867 DRM_DEBUG("invalid me %d\n", me);
4872 case AMDGPU_IRQ_STATE_DISABLE:
4873 mec_int_cntl = RREG32(mec_int_cntl_reg);
4874 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4875 TIME_STAMP_INT_ENABLE, 0);
4876 WREG32(mec_int_cntl_reg, mec_int_cntl);
4878 case AMDGPU_IRQ_STATE_ENABLE:
4879 mec_int_cntl = RREG32(mec_int_cntl_reg);
4880 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4881 TIME_STAMP_INT_ENABLE, 1);
4882 WREG32(mec_int_cntl_reg, mec_int_cntl);
4889 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4890 struct amdgpu_irq_src *source,
4892 enum amdgpu_interrupt_state state)
4895 case AMDGPU_IRQ_STATE_DISABLE:
4896 case AMDGPU_IRQ_STATE_ENABLE:
4897 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4898 PRIV_REG_INT_ENABLE,
4899 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4908 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4909 struct amdgpu_irq_src *source,
4911 enum amdgpu_interrupt_state state)
4914 case AMDGPU_IRQ_STATE_DISABLE:
4915 case AMDGPU_IRQ_STATE_ENABLE:
4916 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4917 PRIV_INSTR_INT_ENABLE,
4918 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4926 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \
4927 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4928 CP_ECC_ERROR_INT_ENABLE, 1)
4930 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \
4931 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4932 CP_ECC_ERROR_INT_ENABLE, 0)
4934 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
4935 struct amdgpu_irq_src *source,
4937 enum amdgpu_interrupt_state state)
4940 case AMDGPU_IRQ_STATE_DISABLE:
4941 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4942 CP_ECC_ERROR_INT_ENABLE, 0);
4943 DISABLE_ECC_ON_ME_PIPE(1, 0);
4944 DISABLE_ECC_ON_ME_PIPE(1, 1);
4945 DISABLE_ECC_ON_ME_PIPE(1, 2);
4946 DISABLE_ECC_ON_ME_PIPE(1, 3);
4949 case AMDGPU_IRQ_STATE_ENABLE:
4950 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4951 CP_ECC_ERROR_INT_ENABLE, 1);
4952 ENABLE_ECC_ON_ME_PIPE(1, 0);
4953 ENABLE_ECC_ON_ME_PIPE(1, 1);
4954 ENABLE_ECC_ON_ME_PIPE(1, 2);
4955 ENABLE_ECC_ON_ME_PIPE(1, 3);
4965 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4966 struct amdgpu_irq_src *src,
4968 enum amdgpu_interrupt_state state)
4971 case AMDGPU_CP_IRQ_GFX_EOP:
4972 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
4974 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4975 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4977 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4978 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4980 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4981 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4983 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4984 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4986 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4987 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4989 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4990 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4992 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4993 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4995 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4996 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5004 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5005 struct amdgpu_irq_src *source,
5006 struct amdgpu_iv_entry *entry)
5009 u8 me_id, pipe_id, queue_id;
5010 struct amdgpu_ring *ring;
5012 DRM_DEBUG("IH: CP EOP\n");
5013 me_id = (entry->ring_id & 0x0c) >> 2;
5014 pipe_id = (entry->ring_id & 0x03) >> 0;
5015 queue_id = (entry->ring_id & 0x70) >> 4;
5019 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5023 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5024 ring = &adev->gfx.compute_ring[i];
5025 /* Per-queue interrupt is supported for MEC starting from VI.
5026 * The interrupt can only be enabled/disabled per pipe instead of per queue.
5028 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5029 amdgpu_fence_process(ring);
5036 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5037 struct amdgpu_iv_entry *entry)
5039 u8 me_id, pipe_id, queue_id;
5040 struct amdgpu_ring *ring;
5043 me_id = (entry->ring_id & 0x0c) >> 2;
5044 pipe_id = (entry->ring_id & 0x03) >> 0;
5045 queue_id = (entry->ring_id & 0x70) >> 4;
5049 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5053 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5054 ring = &adev->gfx.compute_ring[i];
5055 if (ring->me == me_id && ring->pipe == pipe_id &&
5056 ring->queue == queue_id)
5057 drm_sched_fault(&ring->sched);
5063 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5064 struct amdgpu_irq_src *source,
5065 struct amdgpu_iv_entry *entry)
5067 DRM_ERROR("Illegal register access in command stream\n");
5068 gfx_v9_0_fault(adev, entry);
5072 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5073 struct amdgpu_irq_src *source,
5074 struct amdgpu_iv_entry *entry)
5076 DRM_ERROR("Illegal instruction in command stream\n");
5077 gfx_v9_0_fault(adev, entry);
5081 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5082 struct amdgpu_iv_entry *entry)
5084 /* TODO ue will trigger an interrupt. */
5085 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5086 amdgpu_ras_reset_gpu(adev, 0);
5087 return AMDGPU_RAS_UE;
5090 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
5091 struct amdgpu_irq_src *source,
5092 struct amdgpu_iv_entry *entry)
5094 struct ras_common_if *ras_if = adev->gfx.ras_if;
5095 struct ras_dispatch_if ih_data = {
5102 ih_data.head = *ras_if;
5104 DRM_ERROR("CP ECC ERROR IRQ\n");
5105 amdgpu_ras_interrupt_dispatch(adev, &ih_data);
5109 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
5111 .early_init = gfx_v9_0_early_init,
5112 .late_init = gfx_v9_0_late_init,
5113 .sw_init = gfx_v9_0_sw_init,
5114 .sw_fini = gfx_v9_0_sw_fini,
5115 .hw_init = gfx_v9_0_hw_init,
5116 .hw_fini = gfx_v9_0_hw_fini,
5117 .suspend = gfx_v9_0_suspend,
5118 .resume = gfx_v9_0_resume,
5119 .is_idle = gfx_v9_0_is_idle,
5120 .wait_for_idle = gfx_v9_0_wait_for_idle,
5121 .soft_reset = gfx_v9_0_soft_reset,
5122 .set_clockgating_state = gfx_v9_0_set_clockgating_state,
5123 .set_powergating_state = gfx_v9_0_set_powergating_state,
5124 .get_clockgating_state = gfx_v9_0_get_clockgating_state,
5127 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
5128 .type = AMDGPU_RING_TYPE_GFX,
5130 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5131 .support_64bit_ptrs = true,
5132 .vmhub = AMDGPU_GFXHUB,
5133 .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
5134 .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
5135 .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
5136 .emit_frame_size = /* totally 242 maximum if 16 IBs */
5138 7 + /* PIPELINE_SYNC */
5139 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5140 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5142 8 + /* FENCE for VM_FLUSH */
5143 20 + /* GDS switch */
5144 4 + /* double SWITCH_BUFFER,
5145 the first COND_EXEC jump to the place just
5146 prior to this double SWITCH_BUFFER */
5154 8 + 8 + /* FENCE x2 */
5155 2, /* SWITCH_BUFFER */
5156 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
5157 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
5158 .emit_fence = gfx_v9_0_ring_emit_fence,
5159 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5160 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5161 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5162 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5163 .test_ring = gfx_v9_0_ring_test_ring,
5164 .test_ib = gfx_v9_0_ring_test_ib,
5165 .insert_nop = amdgpu_ring_insert_nop,
5166 .pad_ib = amdgpu_ring_generic_pad_ib,
5167 .emit_switch_buffer = gfx_v9_ring_emit_sb,
5168 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
5169 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
5170 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
5171 .emit_tmz = gfx_v9_0_ring_emit_tmz,
5172 .emit_wreg = gfx_v9_0_ring_emit_wreg,
5173 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5174 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5175 .soft_recovery = gfx_v9_0_ring_soft_recovery,
5178 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
5179 .type = AMDGPU_RING_TYPE_COMPUTE,
5181 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5182 .support_64bit_ptrs = true,
5183 .vmhub = AMDGPU_GFXHUB,
5184 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5185 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5186 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5188 20 + /* gfx_v9_0_ring_emit_gds_switch */
5189 7 + /* gfx_v9_0_ring_emit_hdp_flush */
5190 5 + /* hdp invalidate */
5191 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5192 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5193 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5194 2 + /* gfx_v9_0_ring_emit_vm_flush */
5195 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
5196 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
5197 .emit_ib = gfx_v9_0_ring_emit_ib_compute,
5198 .emit_fence = gfx_v9_0_ring_emit_fence,
5199 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5200 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5201 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5202 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5203 .test_ring = gfx_v9_0_ring_test_ring,
5204 .test_ib = gfx_v9_0_ring_test_ib,
5205 .insert_nop = amdgpu_ring_insert_nop,
5206 .pad_ib = amdgpu_ring_generic_pad_ib,
5207 .set_priority = gfx_v9_0_ring_set_priority_compute,
5208 .emit_wreg = gfx_v9_0_ring_emit_wreg,
5209 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5210 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5213 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
5214 .type = AMDGPU_RING_TYPE_KIQ,
5216 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5217 .support_64bit_ptrs = true,
5218 .vmhub = AMDGPU_GFXHUB,
5219 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5220 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5221 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5223 20 + /* gfx_v9_0_ring_emit_gds_switch */
5224 7 + /* gfx_v9_0_ring_emit_hdp_flush */
5225 5 + /* hdp invalidate */
5226 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5227 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5228 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5229 2 + /* gfx_v9_0_ring_emit_vm_flush */
5230 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5231 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
5232 .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
5233 .test_ring = gfx_v9_0_ring_test_ring,
5234 .insert_nop = amdgpu_ring_insert_nop,
5235 .pad_ib = amdgpu_ring_generic_pad_ib,
5236 .emit_rreg = gfx_v9_0_ring_emit_rreg,
5237 .emit_wreg = gfx_v9_0_ring_emit_wreg,
5238 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5239 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5242 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
5246 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
5248 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5249 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
5251 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5252 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
5255 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
5256 .set = gfx_v9_0_set_eop_interrupt_state,
5257 .process = gfx_v9_0_eop_irq,
5260 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
5261 .set = gfx_v9_0_set_priv_reg_fault_state,
5262 .process = gfx_v9_0_priv_reg_irq,
5265 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
5266 .set = gfx_v9_0_set_priv_inst_fault_state,
5267 .process = gfx_v9_0_priv_inst_irq,
5270 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
5271 .set = gfx_v9_0_set_cp_ecc_error_state,
5272 .process = gfx_v9_0_cp_ecc_error_irq,
5276 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
5278 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5279 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
5281 adev->gfx.priv_reg_irq.num_types = 1;
5282 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
5284 adev->gfx.priv_inst_irq.num_types = 1;
5285 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
5287 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
5288 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
5291 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
5293 switch (adev->asic_type) {
5298 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
5305 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
5307 /* init asci gds info */
5308 switch (adev->asic_type) {
5312 adev->gds.mem.total_size = 0x10000;
5315 adev->gds.mem.total_size = 0x1000;
5318 adev->gds.mem.total_size = 0x10000;
5322 switch (adev->asic_type) {
5325 adev->gds.gds_compute_max_wave_id = 0x7ff;
5328 adev->gds.gds_compute_max_wave_id = 0x27f;
5331 if (adev->rev_id >= 0x8)
5332 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
5334 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
5337 /* this really depends on the chip */
5338 adev->gds.gds_compute_max_wave_id = 0x7ff;
5342 adev->gds.gws.total_size = 64;
5343 adev->gds.oa.total_size = 16;
5345 if (adev->gds.mem.total_size == 64 * 1024) {
5346 adev->gds.mem.gfx_partition_size = 4096;
5347 adev->gds.mem.cs_partition_size = 4096;
5349 adev->gds.gws.gfx_partition_size = 4;
5350 adev->gds.gws.cs_partition_size = 4;
5352 adev->gds.oa.gfx_partition_size = 4;
5353 adev->gds.oa.cs_partition_size = 1;
5355 adev->gds.mem.gfx_partition_size = 1024;
5356 adev->gds.mem.cs_partition_size = 1024;
5358 adev->gds.gws.gfx_partition_size = 16;
5359 adev->gds.gws.cs_partition_size = 16;
5361 adev->gds.oa.gfx_partition_size = 4;
5362 adev->gds.oa.cs_partition_size = 4;
5366 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
5374 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5375 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5377 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
5380 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5384 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
5385 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
5387 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5388 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5390 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
5392 return (~data) & mask;
5395 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
5396 struct amdgpu_cu_info *cu_info)
5398 int i, j, k, counter, active_cu_number = 0;
5399 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5400 unsigned disable_masks[4 * 2];
5402 if (!adev || !cu_info)
5405 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5407 mutex_lock(&adev->grbm_idx_mutex);
5408 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5409 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5413 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
5415 gfx_v9_0_set_user_cu_inactive_bitmap(
5416 adev, disable_masks[i * 2 + j]);
5417 bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
5418 cu_info->bitmap[i][j] = bitmap;
5420 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5421 if (bitmap & mask) {
5422 if (counter < adev->gfx.config.max_cu_per_sh)
5428 active_cu_number += counter;
5430 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5431 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5434 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5435 mutex_unlock(&adev->grbm_idx_mutex);
5437 cu_info->number = active_cu_number;
5438 cu_info->ao_cu_mask = ao_cu_mask;
5439 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5444 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
5446 .type = AMD_IP_BLOCK_TYPE_GFX,
5450 .funcs = &gfx_v9_0_ip_funcs,