]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
habanalabs: perform accounting for active CS
authorOded Gabbay <oded.gabbay@gmail.com>
Sun, 3 Mar 2019 13:13:15 +0000 (15:13 +0200)
committerOded Gabbay <oded.gabbay@gmail.com>
Sun, 3 Mar 2019 13:13:15 +0000 (15:13 +0200)
This patch adds accounting for active CS. Active means that the CS was
submitted to the H/W queues and was not completed yet.

This is necessary to support suspend operation. Because the device will be
reset upon suspend, we can only suspend after all active CS have been
completed. Hence, we need to perform accounting on their number.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/misc/habanalabs/command_submission.c
drivers/misc/habanalabs/device.c
drivers/misc/habanalabs/habanalabs.h
drivers/misc/habanalabs/hw_queue.c

index 3525236ed8d9d702e25fac066926ba1933fe4edc..19c84214a7ea8890543ea8341033ed1ceb89df12 100644 (file)
@@ -179,6 +179,12 @@ static void cs_do_release(struct kref *ref)
 
        /* We also need to update CI for internal queues */
        if (cs->submitted) {
+               int cs_cnt = atomic_dec_return(&hdev->cs_active_cnt);
+
+               WARN_ONCE((cs_cnt < 0),
+                       "hl%d: error in CS active cnt %d\n",
+                       hdev->id, cs_cnt);
+
                hl_int_hw_queue_update_ci(cs);
 
                spin_lock(&hdev->hw_queues_mirror_lock);
index 93d67983ddbae8a3889b52d16660ce78d20399f3..470d8005b50e56c0f9d494fd76e1d3514dbfb05f 100644 (file)
@@ -218,6 +218,7 @@ static int device_early_init(struct hl_device *hdev)
        spin_lock_init(&hdev->hw_queues_mirror_lock);
        atomic_set(&hdev->in_reset, 0);
        atomic_set(&hdev->fd_open_cnt, 0);
+       atomic_set(&hdev->cs_active_cnt, 0);
 
        return 0;
 
index 806f0a5ee4d887b16c5c1c4630acd0d418834517..a8ee52c880cd800651681b866048126b2e9fc478 100644 (file)
@@ -1056,13 +1056,15 @@ struct hl_device_reset_work {
  * @cb_pool_lock: protects the CB pool.
  * @user_ctx: current user context executing.
  * @dram_used_mem: current DRAM memory consumption.
- * @in_reset: is device in reset flow.
- * @curr_pll_profile: current PLL profile.
- * @fd_open_cnt: number of open user processes.
  * @timeout_jiffies: device CS timeout value.
  * @max_power: the max power of the device, as configured by the sysadmin. This
  *             value is saved so in case of hard-reset, KMD will restore this
  *             value and update the F/W after the re-initialization
+ * @in_reset: is device in reset flow.
+ * @curr_pll_profile: current PLL profile.
+ * @fd_open_cnt: number of open user processes.
+ * @cs_active_cnt: number of active command submissions on this device (active
+ *                 means already in H/W queues)
  * @major: habanalabs KMD major.
  * @high_pll: high PLL profile frequency.
  * @soft_reset_cnt: number of soft reset since KMD loading.
@@ -1128,11 +1130,12 @@ struct hl_device {
        struct hl_ctx                   *user_ctx;
 
        atomic64_t                      dram_used_mem;
+       u64                             timeout_jiffies;
+       u64                             max_power;
        atomic_t                        in_reset;
        atomic_t                        curr_pll_profile;
        atomic_t                        fd_open_cnt;
-       u64                             timeout_jiffies;
-       u64                             max_power;
+       atomic_t                        cs_active_cnt;
        u32                             major;
        u32                             high_pll;
        u32                             soft_reset_cnt;
index 67bece26417cbe930fa018abdb33c88ba8618b23..ef3bb695136025971c76b916a97dde8a4b36905b 100644 (file)
@@ -370,12 +370,13 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
                spin_unlock(&hdev->hw_queues_mirror_lock);
        }
 
-       list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) {
+       atomic_inc(&hdev->cs_active_cnt);
+
+       list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
                if (job->ext_queue)
                        ext_hw_queue_schedule_job(job);
                else
                        int_hw_queue_schedule_job(job);
-       }
 
        cs->submitted = true;