]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
drm/amdgpu: implement querying ras error count for mmhub9.4
authorDennis Li <Dennis.Li@amd.com>
Tue, 19 Nov 2019 06:02:57 +0000 (14:02 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 22 Nov 2019 19:27:11 +0000 (14:27 -0500)
Get mmhub error counter by accessing EDC_CNT registers.

v2: Add mmhub_v9_4_ prefix for local static variable and function

Signed-off-by: Dennis Li <dennis.li@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h

index ee615d05083737f9e27b06bd1d5a926ee6d3245b..5f4a6cdf83a775c44cba0b5de03a878f3b5fdb83 100644 (file)
@@ -658,6 +658,9 @@ static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
        case CHIP_VEGA20:
                adev->mmhub.funcs = &mmhub_v1_0_funcs;
                break;
+       case CHIP_ARCTURUS:
+               adev->mmhub.funcs = &mmhub_v9_4_funcs;
+               break;
        default:
                break;
        }
index 2c5adfe803a206717ea85c4a7e5c6cc63f0cbeb7..6fe5c39e5581e71c9734fb6484ed3bb3632940db 100644 (file)
@@ -21,6 +21,7 @@
  *
  */
 #include "amdgpu.h"
+#include "amdgpu_ras.h"
 #include "mmhub_v9_4.h"
 
 #include "mmhub/mmhub_9_4_1_offset.h"
@@ -29,7 +30,7 @@
 #include "athub/athub_1_0_offset.h"
 #include "athub/athub_1_0_sh_mask.h"
 #include "vega10_enum.h"
-
+#include "soc15.h"
 #include "soc15_common.h"
 
 #define MMHUB_NUM_INSTANCES                    2
@@ -651,3 +652,253 @@ void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags)
        if (data & ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
                *flags |= AMD_CG_SUPPORT_MC_LS;
 }
+
+static const struct soc15_ras_field_entry mmhub_v9_4_ras_fields[] = {
+       { "MMEA0_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+       },
+       { "MMEA0_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+       },
+       { "MMEA0_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+       },
+       { "MMEA0_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+       },
+       { "MMEA0_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+       },
+       { "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+       0, 0,
+       },
+       { "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+       0, 0,
+       },
+       { "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+       0, 0,
+       },
+       { "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+       0, 0,
+       },
+       { "MMEA0_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+       0, 0,
+       },
+       { "MMEA0_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+       },
+       { "MMEA0_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+       },
+       { "MMEA0_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+       },
+       { "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+       0, 0,
+       },
+       { "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+       SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+       0, 0,
+       },
+       { "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+       0, 0,
+       SOC15_REG_FIELD(MMEA0_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+       },
+       { "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+       0, 0,
+       SOC15_REG_FIELD(MMEA0_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+       },
+       { "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+       0, 0,
+       SOC15_REG_FIELD(MMEA0_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+       },
+       { "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+       0, 0,
+       SOC15_REG_FIELD(MMEA0_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+       },
+       { "MMEA0_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+       0, 0,
+       SOC15_REG_FIELD(MMEA0_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+       },
+       { "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+       0, 0,
+       SOC15_REG_FIELD(MMEA0_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+       },
+       { "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+       0, 0,
+       SOC15_REG_FIELD(MMEA0_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+       },
+       { "MMEA1_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+       },
+       { "MMEA1_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+       },
+       { "MMEA1_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+       },
+       { "MMEA1_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+       },
+       { "MMEA1_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+       },
+       { "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+       0, 0,
+       },
+       { "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+       0, 0,
+       },
+       { "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+       0, 0,
+       },
+       { "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+       0, 0,
+       },
+       { "MMEA1_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+       0, 0,
+       },
+       { "MMEA1_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+       },
+       { "MMEA1_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+       },
+       { "MMEA1_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+       },
+       { "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+       0, 0,
+       },
+       { "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+       SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+       0, 0,
+       },
+       { "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+       0, 0,
+       SOC15_REG_FIELD(MMEA1_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+       },
+       { "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+       0, 0,
+       SOC15_REG_FIELD(MMEA1_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+       },
+       { "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+       0, 0,
+       SOC15_REG_FIELD(MMEA1_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+       },
+       { "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+       0, 0,
+       SOC15_REG_FIELD(MMEA1_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+       },
+       { "MMEA1_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+       0, 0,
+       SOC15_REG_FIELD(MMEA1_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+       },
+       { "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+       0, 0,
+       SOC15_REG_FIELD(MMEA1_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+       },
+       { "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+       0, 0,
+       SOC15_REG_FIELD(MMEA1_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+       }
+};
+
+static const struct soc15_reg_entry mmhub_v9_4_edc_cnt_regs[] = {
+   { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), 0, 0, 0},
+   { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), 0, 0, 0},
+   { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), 0, 0, 0},
+   { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), 0, 0, 0},
+   { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), 0, 0, 0},
+   { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), 0, 0, 0},
+};
+
+static int mmhub_v9_4_get_ras_error_count(const struct soc15_reg_entry *reg,
+       uint32_t value, uint32_t *sec_count, uint32_t *ded_count)
+{
+       uint32_t i;
+       uint32_t sec_cnt, ded_cnt;
+
+       for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_ras_fields); i++) {
+               if(mmhub_v9_4_ras_fields[i].reg_offset != reg->reg_offset)
+                       continue;
+
+               sec_cnt = (value &
+                               mmhub_v9_4_ras_fields[i].sec_count_mask) >>
+                               mmhub_v9_4_ras_fields[i].sec_count_shift;
+               if (sec_cnt) {
+                       DRM_INFO("MMHUB SubBlock %s, SEC %d\n",
+                               mmhub_v9_4_ras_fields[i].name,
+                               sec_cnt);
+                       *sec_count += sec_cnt;
+               }
+
+               ded_cnt = (value &
+                               mmhub_v9_4_ras_fields[i].ded_count_mask) >>
+                               mmhub_v9_4_ras_fields[i].ded_count_shift;
+               if (ded_cnt) {
+                       DRM_INFO("MMHUB SubBlock %s, DED %d\n",
+                               mmhub_v9_4_ras_fields[i].name,
+                               ded_cnt);
+                       *ded_count += ded_cnt;
+               }
+       }
+
+       return 0;
+}
+
+static void mmhub_v9_4_query_ras_error_count(struct amdgpu_device *adev,
+                                          void *ras_error_status)
+{
+       struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+       uint32_t sec_count = 0, ded_count = 0;
+       uint32_t i;
+       uint32_t reg_value;
+
+       err_data->ue_count = 0;
+       err_data->ce_count = 0;
+
+       for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_edc_cnt_regs); i++) {
+               reg_value =
+                       RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v9_4_edc_cnt_regs[i]));
+               if (reg_value)
+                       mmhub_v9_4_get_ras_error_count(&mmhub_v9_4_edc_cnt_regs[i],
+                               reg_value, &sec_count, &ded_count);
+       }
+
+       err_data->ce_count += sec_count;
+       err_data->ue_count += ded_count;
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs = {
+       .ras_late_init = amdgpu_mmhub_ras_late_init,
+       .query_ras_error_count = mmhub_v9_4_query_ras_error_count,
+};
\ No newline at end of file
index d435cfcec1a8038d2bff1aaa030d206a76cc2e30..354a4b7e875bec3815e35e64e74b12a3cb9d5b27 100644 (file)
@@ -23,6 +23,8 @@
 #ifndef __MMHUB_V9_4_H__
 #define __MMHUB_V9_4_H__
 
+extern const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs;
+
 u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev);
 int mmhub_v9_4_gart_enable(struct amdgpu_device *adev);
 void mmhub_v9_4_gart_disable(struct amdgpu_device *adev);