]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
drm/amdgpu: added support to get mGPU DRAM base
authorJohn Clements <john.clements@amd.com>
Fri, 17 Jan 2020 04:18:00 +0000 (12:18 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 22 Jan 2020 21:34:07 +0000 (16:34 -0500)
resolves issue with RAS error injection in mGPU configuration

Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: John Clements <john.clements@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/df_v3_6.c
drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h

index 61a26c15c8dd50398b4dc0909372863b15fc1782..057f6ea645d7eb12384007a96365002ef5e0aa17 100644 (file)
@@ -52,6 +52,9 @@ struct amdgpu_df_funcs {
        uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val);
        void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val,
                         uint32_t ficadl_val, uint32_t ficadh_val);
+       uint64_t (*get_dram_base_addr)(struct amdgpu_device *adev,
+                                      uint32_t df_inst);
+       uint32_t (*get_df_inst_id)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_df {
index 766be7f182825e6ff5c8e4885a82d5f3b1c6eba4..cef94e2169fe4532cf37ff91f975eca14845369c 100644 (file)
@@ -742,6 +742,20 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
        return 0;
 }
 
+uint64_t get_xgmi_relative_phy_addr(struct amdgpu_device *adev, uint64_t addr)
+{
+       uint32_t df_inst_id;
+
+       if ((!adev->df.funcs)                 ||
+           (!adev->df.funcs->get_df_inst_id) ||
+           (!adev->df.funcs->get_dram_base_addr))
+               return addr;
+
+       df_inst_id = adev->df.funcs->get_df_inst_id(adev);
+
+       return addr + adev->df.funcs->get_dram_base_addr(adev, df_inst_id);
+}
+
 /* wrapper of psp_ras_trigger_error */
 int amdgpu_ras_error_inject(struct amdgpu_device *adev,
                struct ras_inject_if *info)
@@ -759,6 +773,12 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
        if (!obj)
                return -EINVAL;
 
+       /* Calculate XGMI relative offset */
+       if (adev->gmc.xgmi.num_physical_nodes > 1) {
+               block_info.address = get_xgmi_relative_phy_addr(adev,
+                                                               block_info.address);
+       }
+
        switch (info->head.block) {
        case AMDGPU_RAS_BLOCK__GFX:
                if (adev->gfx.funcs->ras_error_inject)
index f51326598a8c05bed36776522e07eea3288846ad..5a1bd8ed1a6c51ab34d0d65f834c3d5a8ccd666a 100644 (file)
@@ -27,6 +27,9 @@
 #include "df/df_3_6_offset.h"
 #include "df/df_3_6_sh_mask.h"
 
+#define DF_3_6_SMN_REG_INST_DIST        0x8
+#define DF_3_6_INST_CNT                 8
+
 static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0,
                                       16, 32, 0, 0, 0, 2, 4, 8};
 
@@ -683,6 +686,58 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
        }
 }
 
+static uint64_t df_v3_6_get_dram_base_addr(struct amdgpu_device *adev,
+                                          uint32_t df_inst)
+{
+       uint32_t base_addr_reg_val      = 0;
+       uint64_t base_addr              = 0;
+
+       base_addr_reg_val = RREG32_PCIE(smnDF_CS_UMC_AON0_DramBaseAddress0 +
+                                       df_inst * DF_3_6_SMN_REG_INST_DIST);
+
+       if (REG_GET_FIELD(base_addr_reg_val,
+                         DF_CS_UMC_AON0_DramBaseAddress0,
+                         AddrRngVal) == 0) {
+               DRM_WARN("address range not valid");
+               return 0;
+       }
+
+       base_addr = REG_GET_FIELD(base_addr_reg_val,
+                                 DF_CS_UMC_AON0_DramBaseAddress0,
+                                 DramBaseAddr);
+
+       return base_addr << 28;
+}
+
+static uint32_t df_v3_6_get_df_inst_id(struct amdgpu_device *adev)
+{
+       uint32_t xgmi_node_id   = 0;
+       uint32_t df_inst_id     = 0;
+
+       /* Walk through DF dst nodes to find current XGMI node */
+       for (df_inst_id = 0; df_inst_id < DF_3_6_INST_CNT; df_inst_id++) {
+
+               xgmi_node_id = RREG32_PCIE(smnDF_CS_UMC_AON0_DramLimitAddress0 +
+                                          df_inst_id * DF_3_6_SMN_REG_INST_DIST);
+               xgmi_node_id = REG_GET_FIELD(xgmi_node_id,
+                                            DF_CS_UMC_AON0_DramLimitAddress0,
+                                            DstFabricID);
+
+               /* TODO: establish reason dest fabric id is offset by 7 */
+               xgmi_node_id = xgmi_node_id >> 7;
+
+               if (adev->gmc.xgmi.physical_node_id == xgmi_node_id)
+                       break;
+       }
+
+       if (df_inst_id == DF_3_6_INST_CNT) {
+               DRM_WARN("cant match df dst id with gpu node");
+               return 0;
+       }
+
+       return df_inst_id;
+}
+
 const struct amdgpu_df_funcs df_v3_6_funcs = {
        .sw_init = df_v3_6_sw_init,
        .sw_fini = df_v3_6_sw_fini,
@@ -696,5 +751,7 @@ const struct amdgpu_df_funcs df_v3_6_funcs = {
        .pmc_stop = df_v3_6_pmc_stop,
        .pmc_get_count = df_v3_6_pmc_get_count,
        .get_fica = df_v3_6_get_fica,
-       .set_fica = df_v3_6_set_fica
+       .set_fica = df_v3_6_set_fica,
+       .get_dram_base_addr = df_v3_6_get_dram_base_addr,
+       .get_df_inst_id = df_v3_6_get_df_inst_id
 };
index 87c84691b5be9ce79b2f27b17fc3cf382677f9f8..bb2c9c7a18dffec143ab7c37fc61d2b20bada8ee 100644 (file)
@@ -71,4 +71,7 @@
 #define smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3                0x1d098UL
 #define smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3                0x1d09cUL
 
+#define smnDF_CS_UMC_AON0_DramBaseAddress0     0x1c110UL
+#define smnDF_CS_UMC_AON0_DramLimitAddress0    0x1c114UL
+
 #endif
index 65e9f756e86e05e2bd236a8f69224a82850badb2..7afa87c7ff5437d875182a552b9e1a928a38bd92 100644 (file)
 #define DF_CS_UMC_AON0_DramBaseAddress0__IntLvAddrSel_MASK                                             0x00000E00L
 #define DF_CS_UMC_AON0_DramBaseAddress0__DramBaseAddr_MASK                                             0xFFFFF000L
 
+//DF_CS_UMC_AON0_DramLimitAddress0
+#define DF_CS_UMC_AON0_DramLimitAddress0__DstFabricID__SHIFT                                                  0x0
+#define DF_CS_UMC_AON0_DramLimitAddress0__AllowReqIO__SHIFT                                                   0xa
+#define DF_CS_UMC_AON0_DramLimitAddress0__DramLimitAddr__SHIFT                                                0xc
+#define DF_CS_UMC_AON0_DramLimitAddress0__DstFabricID_MASK                                                    0x000003FFL
+#define DF_CS_UMC_AON0_DramLimitAddress0__AllowReqIO_MASK                                                     0x00000400L
+#define DF_CS_UMC_AON0_DramLimitAddress0__DramLimitAddr_MASK                                                  0xFFFFF000L
+
 #endif