drm/amdgpu: fix build error without CONFIG_HSA_AMD

[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 682833f90fddfc1fad1d9658ce4e61dad99ae123..b893ec935b841c290ba73a6a82ed5b4123cb92bc 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -65,19 +65,22 @@
  #include "amdgpu_ras.h"
  #include "amdgpu_pmu.h"
  
+#include <linux/suspend.h>
+
  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
  
  #define AMDGPU_RESUME_MS               2000
  
-static const char *amdgpu_asic_name[] = {
+const char *amdgpu_asic_name[] = {
         "TAHITI",
         "PITCAIRN",
         "VERDE",
@@ -102,6 +105,7 @@ static const char *amdgpu_asic_name[] = {
         "VEGA20",
         "RAVEN",
         "ARCTURUS",
+       "RENOIR",
         "NAVI10",
         "NAVI14",
         "NAVI12",
@@ -1427,6 +1431,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
         case CHIP_ARCTURUS:
                 chip_name = "arcturus";
                 break;
+       case CHIP_RENOIR:
+               chip_name = "renoir";
+               break;
         case CHIP_NAVI10:
                 chip_name = "navi10";
                 break;
@@ -1579,7 +1586,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
         case CHIP_VEGA20:
         case CHIP_RAVEN:
         case CHIP_ARCTURUS:
-               if (adev->asic_type == CHIP_RAVEN)
+       case CHIP_RENOIR:
+               if (adev->asic_type == CHIP_RAVEN ||
+                   adev->asic_type == CHIP_RENOIR)
                         adev->family = AMDGPU_FAMILY_RV;
                 else
                         adev->family = AMDGPU_FAMILY_AI;
@@ -1615,7 +1624,11 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
         }
  
         adev->pm.pp_feature = amdgpu_pp_feature_mask;
-       if (amdgpu_sriov_vf(adev))
+       if (amdgpu_sriov_vf(adev)
+           #ifdef CONFIG_HSA_AMD
+           || sched_policy == KFD_SCHED_POLICY_NO_HWS
+           #endif
+           )
                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
  
         for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -2511,6 +2524,9 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
         case CHIP_NAVI10:
         case CHIP_NAVI14:
         case CHIP_NAVI12:
+#endif
+#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
+       case CHIP_RENOIR:
  #endif
                 return amdgpu_dc != 0;
  #endif
@@ -2573,7 +2589,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
         adev->ddev = ddev;
         adev->pdev = pdev;
         adev->flags = flags;
-       adev->asic_type = flags & AMD_ASIC_MASK;
+
+       if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
+               adev->asic_type = amdgpu_force_asic_type;
+       else
+               adev->asic_type = flags & AMD_ASIC_MASK;
+
         adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
         if (amdgpu_emu_mode == 1)
                 adev->usec_timeout *= 2;
@@ -3476,7 +3497,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
         amdgpu_virt_init_data_exchange(adev);
         amdgpu_virt_release_full_gpu(adev, true);
         if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
-               atomic_inc(&adev->vram_lost_counter);
+               amdgpu_inc_vram_lost(adev);
                 r = amdgpu_device_recover_vram(adev);
         }
  
@@ -3518,6 +3539,7 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
                 case CHIP_VEGA20:
                 case CHIP_VEGA10:
                 case CHIP_VEGA12:
+               case CHIP_RAVEN:
                         break;
                 default:
                         goto disabled;
@@ -3617,11 +3639,6 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
                                                 break;
                                 }
                         }
-
-                       list_for_each_entry(tmp_adev, device_list_handle,
-                                       gmc.xgmi.head) {
-                               amdgpu_ras_reserve_bad_pages(tmp_adev);
-                       }
                 }
         }
  
@@ -3641,7 +3658,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
                                 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
                                 if (vram_lost) {
                                         DRM_INFO("VRAM is lost due to GPU reset!\n");
-                                       atomic_inc(&tmp_adev->vram_lost_counter);
+                                       amdgpu_inc_vram_lost(tmp_adev);
                                 }
  
                                 r = amdgpu_gtt_mgr_recover(
@@ -3725,25 +3742,18 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
                 adev->mp1_state = PP_MP1_STATE_NONE;
                 break;
         }
-       /* Block kfd: SRIOV would do it separately */
-       if (!amdgpu_sriov_vf(adev))
-                amdgpu_amdkfd_pre_reset(adev);
  
         return true;
  }
  
  static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
  {
-       /*unlock kfd: SRIOV would do it separately */
-       if (!amdgpu_sriov_vf(adev))
-                amdgpu_amdkfd_post_reset(adev);
         amdgpu_vf_error_trans_all(adev);
         adev->mp1_state = PP_MP1_STATE_NONE;
         adev->in_gpu_reset = 0;
         mutex_unlock(&adev->lock_reset);
  }
  
-
  /**
   * amdgpu_device_gpu_recover - reset the asic and recover scheduler
   *
@@ -3763,11 +3773,24 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
         struct amdgpu_hive_info *hive = NULL;
         struct amdgpu_device *tmp_adev = NULL;
         int i, r = 0;
+       bool in_ras_intr = amdgpu_ras_intr_triggered();
+
+       /*
+        * Flush RAM to disk so that after reboot
+        * the user can read log and see why the system rebooted.
+        */
+       if (in_ras_intr && amdgpu_ras_get_context(adev)->reboot) {
+
+               DRM_WARN("Emergency reboot.");
+
+               ksys_sync_helper();
+               emergency_restart();
+       }
  
         need_full_reset = job_signaled = false;
         INIT_LIST_HEAD(&device_list);
  
-       dev_info(adev->dev, "GPU reset begin!\n");
+       dev_info(adev->dev, "GPU %s begin!\n", in_ras_intr ? "jobs stop":"reset");
  
         cancel_delayed_work_sync(&adev->delayed_init_work);
  
@@ -3783,20 +3806,27 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
  
         if (hive && !mutex_trylock(&hive->reset_lock)) {
                 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
-                        job->base.id, hive->hive_id);
+                         job ? job->base.id : -1, hive->hive_id);
                 return 0;
         }
  
         /* Start with adev pre asic reset first for soft reset check.*/
         if (!amdgpu_device_lock_adev(adev, !hive)) {
                 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
-                                        job->base.id);
+                         job ? job->base.id : -1);
                 return 0;
         }
  
+       /* Block kfd: SRIOV would do it separately */
+       if (!amdgpu_sriov_vf(adev))
+                amdgpu_amdkfd_pre_reset(adev);
+
         /* Build list of devices to reset */
         if  (adev->gmc.xgmi.num_physical_nodes > 1) {
                 if (!hive) {
+                       /*unlock kfd: SRIOV would do it separately */
+                       if (!amdgpu_sriov_vf(adev))
+                               amdgpu_amdkfd_post_reset(adev);
                         amdgpu_device_unlock_adev(adev);
                         return -ENODEV;
                 }
@@ -3812,17 +3842,22 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
                 device_list_handle = &device_list;
         }
  
-       /*
-        * Mark these ASICs to be reseted as untracked first
-        * And add them back after reset completed
-        */
-       list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head)
-               amdgpu_unregister_gpu_instance(tmp_adev);
-
         /* block all schedulers and reset given job's ring */
         list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
+               if (tmp_adev != adev) {
+                       amdgpu_device_lock_adev(tmp_adev, false);
+                       if (!amdgpu_sriov_vf(tmp_adev))
+                                       amdgpu_amdkfd_pre_reset(tmp_adev);
+               }
+
+               /*
+                * Mark these ASICs to be reseted as untracked first
+                * And add them back after reset completed
+                */
+               amdgpu_unregister_gpu_instance(tmp_adev);
+
                 /* disable ras on ALL IPs */
-               if (amdgpu_device_ip_need_full_reset(tmp_adev))
+               if (!in_ras_intr && amdgpu_device_ip_need_full_reset(tmp_adev))
                         amdgpu_ras_suspend(tmp_adev);
  
                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
@@ -3831,11 +3866,17 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
                         if (!ring || !ring->sched.thread)
                                 continue;
  
-                       drm_sched_stop(&ring->sched, &job->base);
+                       drm_sched_stop(&ring->sched, job ? &job->base : NULL);
+
+                       if (in_ras_intr)
+                               amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
                 }
         }
  
  
+       if (in_ras_intr)
+               goto skip_sched_resume;
+
         /*
          * Must check guilty signal here since after this point all old
          * HW fences are force signaled.
@@ -3846,9 +3887,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
             dma_fence_is_signaled(job->base.s_fence->parent))
                 job_signaled = true;
  
-       if (!amdgpu_device_ip_need_full_reset(adev))
-               device_list_handle = &device_list;
-
         if (job_signaled) {
                 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
                 goto skip_hw_reset;
@@ -3856,9 +3894,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
  
  
         /* Guilty job will be freed after this*/
-       r = amdgpu_device_pre_asic_reset(adev,
-                                        job,
-                                        &need_full_reset);
+       r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset);
         if (r) {
                 /*TODO Should we stop ?*/
                 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
@@ -3872,7 +3908,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
                 if (tmp_adev == adev)
                         continue;
  
-               amdgpu_device_lock_adev(tmp_adev, false);
                 r = amdgpu_device_pre_asic_reset(tmp_adev,
                                                  NULL,
                                                  &need_full_reset);
@@ -3900,6 +3935,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
  
         /* Post ASIC reset for all devs .*/
         list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
+
                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
                         struct amdgpu_ring *ring = tmp_adev->rings[i];
  
@@ -3921,12 +3957,18 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
  
                 if (r) {
                         /* bad news, how to tell it to userspace ? */
-                       dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
+                       dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
                         amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
                 } else {
-                       dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter));
+                       dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
                 }
+       }
  
+skip_sched_resume:
+       list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
+               /*unlock kfd: SRIOV would do it separately */
+               if (!in_ras_intr && !amdgpu_sriov_vf(tmp_adev))
+                       amdgpu_amdkfd_post_reset(tmp_adev);
                 amdgpu_device_unlock_adev(tmp_adev);
         }