drivers/misc/habanalabs/device.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 /*
   4  * Copyright 2016-2019 HabanaLabs, Ltd.
   5  * All Rights Reserved.
   6  */
   7
   8 #define pr_fmt(fmt)                     "habanalabs: " fmt
   9
  10 #include "habanalabs.h"
  11
  12 #include <linux/pci.h>
  13 #include <linux/sched/signal.h>
  14 #include <linux/hwmon.h>
  15 #include <uapi/misc/habanalabs.h>
  16
  17 #define HL_PLDM_PENDING_RESET_PER_SEC   (HL_PENDING_RESET_PER_SEC * 10)
  18
  19 bool hl_device_disabled_or_in_reset(struct hl_device *hdev)
  20 {
  21         if ((hdev->disabled) || (atomic_read(&hdev->in_reset)))
  22                 return true;
  23         else
  24                 return false;
  25 }
  26
  27 enum hl_device_status hl_device_status(struct hl_device *hdev)
  28 {
  29         enum hl_device_status status;
  30
  31         if (hdev->disabled)
  32                 status = HL_DEVICE_STATUS_MALFUNCTION;
  33         else if (atomic_read(&hdev->in_reset))
  34                 status = HL_DEVICE_STATUS_IN_RESET;
  35         else
  36                 status = HL_DEVICE_STATUS_OPERATIONAL;
  37
  38         return status;
  39 };
  40
  41 static void hpriv_release(struct kref *ref)
  42 {
  43         struct hl_fpriv *hpriv;
  44         struct hl_device *hdev;
  45
  46         hpriv = container_of(ref, struct hl_fpriv, refcount);
  47
  48         hdev = hpriv->hdev;
  49
  50         put_pid(hpriv->taskpid);
  51
  52         hl_debugfs_remove_file(hpriv);
  53
  54         mutex_destroy(&hpriv->restore_phase_mutex);
  55
  56         kfree(hpriv);
  57
  58         /* Now the FD is really closed */
  59         atomic_dec(&hdev->fd_open_cnt);
  60
  61         /* This allows a new user context to open the device */
  62         hdev->user_ctx = NULL;
  63 }
  64
  65 void hl_hpriv_get(struct hl_fpriv *hpriv)
  66 {
  67         kref_get(&hpriv->refcount);
  68 }
  69
  70 void hl_hpriv_put(struct hl_fpriv *hpriv)
  71 {
  72         kref_put(&hpriv->refcount, hpriv_release);
  73 }
  74
  75 /*
  76  * hl_device_release - release function for habanalabs device
  77  *
  78  * @inode: pointer to inode structure
  79  * @filp: pointer to file structure
  80  *
  81  * Called when process closes an habanalabs device
  82  */
  83 static int hl_device_release(struct inode *inode, struct file *filp)
  84 {
  85         struct hl_fpriv *hpriv = filp->private_data;
  86
  87         hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
  88         hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
  89
  90         filp->private_data = NULL;
  91
  92         hl_hpriv_put(hpriv);
  93
  94         return 0;
  95 }
  96
  97 /*
  98  * hl_mmap - mmap function for habanalabs device
  99  *
 100  * @*filp: pointer to file structure
 101  * @*vma: pointer to vm_area_struct of the process
 102  *
 103  * Called when process does an mmap on habanalabs device. Call the device's mmap
 104  * function at the end of the common code.
 105  */
 106 static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
 107 {
 108         struct hl_fpriv *hpriv = filp->private_data;
 109
 110         if ((vma->vm_pgoff & HL_MMAP_CB_MASK) == HL_MMAP_CB_MASK) {
 111                 vma->vm_pgoff ^= HL_MMAP_CB_MASK;
 112                 return hl_cb_mmap(hpriv, vma);
 113         }
 114
 115         return -EINVAL;
 116 }
 117
 118 static const struct file_operations hl_ops = {
 119         .owner = THIS_MODULE,
 120         .open = hl_device_open,
 121         .release = hl_device_release,
 122         .mmap = hl_mmap,
 123         .unlocked_ioctl = hl_ioctl,
 124         .compat_ioctl = hl_ioctl
 125 };
 126
 127 /*
 128  * device_setup_cdev - setup cdev and device for habanalabs device
 129  *
 130  * @hdev: pointer to habanalabs device structure
 131  * @hclass: pointer to the class object of the device
 132  * @minor: minor number of the specific device
 133  * @fpos : file operations to install for this device
 134  *
 135  * Create a cdev and a Linux device for habanalabs's device. Need to be
 136  * called at the end of the habanalabs device initialization process,
 137  * because this function exposes the device to the user
 138  */
 139 static int device_setup_cdev(struct hl_device *hdev, struct class *hclass,
 140                                 int minor, const struct file_operations *fops)
 141 {
 142         int err, devno = MKDEV(hdev->major, minor);
 143         struct cdev *hdev_cdev = &hdev->cdev;
 144         char *name;
 145
 146         name = kasprintf(GFP_KERNEL, "hl%d", hdev->id);
 147         if (!name)
 148                 return -ENOMEM;
 149
 150         cdev_init(hdev_cdev, fops);
 151         hdev_cdev->owner = THIS_MODULE;
 152         err = cdev_add(hdev_cdev, devno, 1);
 153         if (err) {
 154                 pr_err("Failed to add char device %s\n", name);
 155                 goto err_cdev_add;
 156         }
 157
 158         hdev->dev = device_create(hclass, NULL, devno, NULL, "%s", name);
 159         if (IS_ERR(hdev->dev)) {
 160                 pr_err("Failed to create device %s\n", name);
 161                 err = PTR_ERR(hdev->dev);
 162                 goto err_device_create;
 163         }
 164
 165         dev_set_drvdata(hdev->dev, hdev);
 166
 167         kfree(name);
 168
 169         return 0;
 170
 171 err_device_create:
 172         cdev_del(hdev_cdev);
 173 err_cdev_add:
 174         kfree(name);
 175         return err;
 176 }
 177
 178 /*
 179  * device_early_init - do some early initialization for the habanalabs device
 180  *
 181  * @hdev: pointer to habanalabs device structure
 182  *
 183  * Install the relevant function pointers and call the early_init function,
 184  * if such a function exists
 185  */
 186 static int device_early_init(struct hl_device *hdev)
 187 {
 188         int rc;
 189
 190         switch (hdev->asic_type) {
 191         case ASIC_GOYA:
 192                 goya_set_asic_funcs(hdev);
 193                 strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
 194                 break;
 195         default:
 196                 dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
 197                         hdev->asic_type);
 198                 return -EINVAL;
 199         }
 200
 201         rc = hdev->asic_funcs->early_init(hdev);
 202         if (rc)
 203                 return rc;
 204
 205         rc = hl_asid_init(hdev);
 206         if (rc)
 207                 goto early_fini;
 208
 209         hdev->cq_wq = alloc_workqueue("hl-free-jobs", WQ_UNBOUND, 0);
 210         if (hdev->cq_wq == NULL) {
 211                 dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
 212                 rc = -ENOMEM;
 213                 goto asid_fini;
 214         }
 215
 216         hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
 217         if (hdev->eq_wq == NULL) {
 218                 dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
 219                 rc = -ENOMEM;
 220                 goto free_cq_wq;
 221         }
 222
 223         hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
 224                                         GFP_KERNEL);
 225         if (!hdev->hl_chip_info) {
 226                 rc = -ENOMEM;
 227                 goto free_eq_wq;
 228         }
 229
 230         hl_cb_mgr_init(&hdev->kernel_cb_mgr);
 231
 232         mutex_init(&hdev->fd_open_cnt_lock);
 233         mutex_init(&hdev->send_cpu_message_lock);
 234         INIT_LIST_HEAD(&hdev->hw_queues_mirror_list);
 235         spin_lock_init(&hdev->hw_queues_mirror_lock);
 236         atomic_set(&hdev->in_reset, 0);
 237         atomic_set(&hdev->fd_open_cnt, 0);
 238         atomic_set(&hdev->cs_active_cnt, 0);
 239
 240         return 0;
 241
 242 free_eq_wq:
 243         destroy_workqueue(hdev->eq_wq);
 244 free_cq_wq:
 245         destroy_workqueue(hdev->cq_wq);
 246 asid_fini:
 247         hl_asid_fini(hdev);
 248 early_fini:
 249         if (hdev->asic_funcs->early_fini)
 250                 hdev->asic_funcs->early_fini(hdev);
 251
 252         return rc;
 253 }
 254
 255 /*
 256  * device_early_fini - finalize all that was done in device_early_init
 257  *
 258  * @hdev: pointer to habanalabs device structure
 259  *
 260  */
 261 static void device_early_fini(struct hl_device *hdev)
 262 {
 263         mutex_destroy(&hdev->send_cpu_message_lock);
 264
 265         hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
 266
 267         kfree(hdev->hl_chip_info);
 268
 269         destroy_workqueue(hdev->eq_wq);
 270         destroy_workqueue(hdev->cq_wq);
 271
 272         hl_asid_fini(hdev);
 273
 274         if (hdev->asic_funcs->early_fini)
 275                 hdev->asic_funcs->early_fini(hdev);
 276
 277         mutex_destroy(&hdev->fd_open_cnt_lock);
 278 }
 279
 280 static void set_freq_to_low_job(struct work_struct *work)
 281 {
 282         struct hl_device *hdev = container_of(work, struct hl_device,
 283                                                 work_freq.work);
 284
 285         if (atomic_read(&hdev->fd_open_cnt) == 0)
 286                 hl_device_set_frequency(hdev, PLL_LOW);
 287
 288         schedule_delayed_work(&hdev->work_freq,
 289                         usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
 290 }
 291
 292 static void hl_device_heartbeat(struct work_struct *work)
 293 {
 294         struct hl_device *hdev = container_of(work, struct hl_device,
 295                                                 work_heartbeat.work);
 296
 297         if (hl_device_disabled_or_in_reset(hdev))
 298                 goto reschedule;
 299
 300         if (!hdev->asic_funcs->send_heartbeat(hdev))
 301                 goto reschedule;
 302
 303         dev_err(hdev->dev, "Device heartbeat failed!\n");
 304         hl_device_reset(hdev, true, false);
 305
 306         return;
 307
 308 reschedule:
 309         schedule_delayed_work(&hdev->work_heartbeat,
 310                         usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
 311 }
 312
 313 /*
 314  * device_late_init - do late stuff initialization for the habanalabs device
 315  *
 316  * @hdev: pointer to habanalabs device structure
 317  *
 318  * Do stuff that either needs the device H/W queues to be active or needs
 319  * to happen after all the rest of the initialization is finished
 320  */
 321 static int device_late_init(struct hl_device *hdev)
 322 {
 323         int rc;
 324
 325         INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
 326         hdev->high_pll = hdev->asic_prop.high_pll;
 327
 328         /* force setting to low frequency */
 329         atomic_set(&hdev->curr_pll_profile, PLL_LOW);
 330
 331         if (hdev->pm_mng_profile == PM_AUTO)
 332                 hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
 333         else
 334                 hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
 335
 336         if (hdev->asic_funcs->late_init) {
 337                 rc = hdev->asic_funcs->late_init(hdev);
 338                 if (rc) {
 339                         dev_err(hdev->dev,
 340                                 "failed late initialization for the H/W\n");
 341                         return rc;
 342                 }
 343         }
 344
 345         schedule_delayed_work(&hdev->work_freq,
 346                         usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
 347
 348         if (hdev->heartbeat) {
 349                 INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
 350                 schedule_delayed_work(&hdev->work_heartbeat,
 351                                 usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
 352         }
 353
 354         hdev->late_init_done = true;
 355
 356         return 0;
 357 }
 358
 359 /*
 360  * device_late_fini - finalize all that was done in device_late_init
 361  *
 362  * @hdev: pointer to habanalabs device structure
 363  *
 364  */
 365 static void device_late_fini(struct hl_device *hdev)
 366 {
 367         if (!hdev->late_init_done)
 368                 return;
 369
 370         cancel_delayed_work_sync(&hdev->work_freq);
 371         if (hdev->heartbeat)
 372                 cancel_delayed_work_sync(&hdev->work_heartbeat);
 373
 374         if (hdev->asic_funcs->late_fini)
 375                 hdev->asic_funcs->late_fini(hdev);
 376
 377         hdev->late_init_done = false;
 378 }
 379
 380 /*
 381  * hl_device_set_frequency - set the frequency of the device
 382  *
 383  * @hdev: pointer to habanalabs device structure
 384  * @freq: the new frequency value
 385  *
 386  * Change the frequency if needed.
 387  * We allose to set PLL to low only if there is no user process
 388  * Returns 0 if no change was done, otherwise returns 1;
 389  */
 390 int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
 391 {
 392         enum hl_pll_frequency old_freq =
 393                         (freq == PLL_HIGH) ? PLL_LOW : PLL_HIGH;
 394         int ret;
 395
 396         if (hdev->pm_mng_profile == PM_MANUAL)
 397                 return 0;
 398
 399         ret = atomic_cmpxchg(&hdev->curr_pll_profile, old_freq, freq);
 400         if (ret == freq)
 401                 return 0;
 402
 403         /*
 404          * in case we want to lower frequency, check if device is not
 405          * opened. We must have a check here to workaround race condition with
 406          * hl_device_open
 407          */
 408         if ((freq == PLL_LOW) && (atomic_read(&hdev->fd_open_cnt) > 0)) {
 409                 atomic_set(&hdev->curr_pll_profile, PLL_HIGH);
 410                 return 0;
 411         }
 412
 413         dev_dbg(hdev->dev, "Changing device frequency to %s\n",
 414                 freq == PLL_HIGH ? "high" : "low");
 415
 416         hdev->asic_funcs->set_pll_profile(hdev, freq);
 417
 418         return 1;
 419 }
 420
 421 /*
 422  * hl_device_suspend - initiate device suspend
 423  *
 424  * @hdev: pointer to habanalabs device structure
 425  *
 426  * Puts the hw in the suspend state (all asics).
 427  * Returns 0 for success or an error on failure.
 428  * Called at driver suspend.
 429  */
 430 int hl_device_suspend(struct hl_device *hdev)
 431 {
 432         int rc;
 433
 434         pci_save_state(hdev->pdev);
 435
 436         /* Block future CS/VM/JOB completion operations */
 437         rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
 438         if (rc) {
 439                 dev_err(hdev->dev, "Can't suspend while in reset\n");
 440                 return -EIO;
 441         }
 442
 443         /* This blocks all other stuff that is not blocked by in_reset */
 444         hdev->disabled = true;
 445
 446         /*
 447          * Flush anyone that is inside the critical section of enqueue
 448          * jobs to the H/W
 449          */
 450         hdev->asic_funcs->hw_queues_lock(hdev);
 451         hdev->asic_funcs->hw_queues_unlock(hdev);
 452
 453         /* Flush processes that are sending message to CPU */
 454         mutex_lock(&hdev->send_cpu_message_lock);
 455         mutex_unlock(&hdev->send_cpu_message_lock);
 456
 457         rc = hdev->asic_funcs->suspend(hdev);
 458         if (rc)
 459                 dev_err(hdev->dev,
 460                         "Failed to disable PCI access of device CPU\n");
 461
 462         /* Shut down the device */
 463         pci_disable_device(hdev->pdev);
 464         pci_set_power_state(hdev->pdev, PCI_D3hot);
 465
 466         return 0;
 467 }
 468
 469 /*
 470  * hl_device_resume - initiate device resume
 471  *
 472  * @hdev: pointer to habanalabs device structure
 473  *
 474  * Bring the hw back to operating state (all asics).
 475  * Returns 0 for success or an error on failure.
 476  * Called at driver resume.
 477  */
 478 int hl_device_resume(struct hl_device *hdev)
 479 {
 480         int rc;
 481
 482         pci_set_power_state(hdev->pdev, PCI_D0);
 483         pci_restore_state(hdev->pdev);
 484         rc = pci_enable_device_mem(hdev->pdev);
 485         if (rc) {
 486                 dev_err(hdev->dev,
 487                         "Failed to enable PCI device in resume\n");
 488                 return rc;
 489         }
 490
 491         pci_set_master(hdev->pdev);
 492
 493         rc = hdev->asic_funcs->resume(hdev);
 494         if (rc) {
 495                 dev_err(hdev->dev, "Failed to resume device after suspend\n");
 496                 goto disable_device;
 497         }
 498
 499
 500         hdev->disabled = false;
 501         atomic_set(&hdev->in_reset, 0);
 502
 503         rc = hl_device_reset(hdev, true, false);
 504         if (rc) {
 505                 dev_err(hdev->dev, "Failed to reset device during resume\n");
 506                 goto disable_device;
 507         }
 508
 509         return 0;
 510
 511 disable_device:
 512         pci_clear_master(hdev->pdev);
 513         pci_disable_device(hdev->pdev);
 514
 515         return rc;
 516 }
 517
 518 static void device_kill_open_processes(struct hl_device *hdev)
 519 {
 520         u16 pending_total, pending_cnt;
 521         struct task_struct *task = NULL;
 522
 523         if (hdev->pldm)
 524                 pending_total = HL_PLDM_PENDING_RESET_PER_SEC;
 525         else
 526                 pending_total = HL_PENDING_RESET_PER_SEC;
 527
 528         pending_cnt = pending_total;
 529
 530         /* Flush all processes that are inside hl_open */
 531         mutex_lock(&hdev->fd_open_cnt_lock);
 532
 533         while ((atomic_read(&hdev->fd_open_cnt)) && (pending_cnt)) {
 534
 535                 pending_cnt--;
 536
 537                 dev_info(hdev->dev,
 538                         "Can't HARD reset, waiting for user to close FD\n");
 539                 ssleep(1);
 540         }
 541
 542         if (atomic_read(&hdev->fd_open_cnt)) {
 543                 task = get_pid_task(hdev->user_ctx->hpriv->taskpid,
 544                                         PIDTYPE_PID);
 545                 if (task) {
 546                         dev_info(hdev->dev, "Killing user processes\n");
 547                         send_sig(SIGKILL, task, 1);
 548                         msleep(100);
 549
 550                         put_task_struct(task);
 551                 }
 552         }
 553
 554         /* We killed the open users, but because the driver cleans up after the
 555          * user contexts are closed (e.g. mmu mappings), we need to wait again
 556          * to make sure the cleaning phase is finished before continuing with
 557          * the reset
 558          */
 559
 560         pending_cnt = pending_total;
 561
 562         while ((atomic_read(&hdev->fd_open_cnt)) && (pending_cnt)) {
 563
 564                 pending_cnt--;
 565
 566                 ssleep(1);
 567         }
 568
 569         if (atomic_read(&hdev->fd_open_cnt))
 570                 dev_crit(hdev->dev,
 571                         "Going to hard reset with open user contexts\n");
 572
 573         mutex_unlock(&hdev->fd_open_cnt_lock);
 574
 575 }
 576
 577 static void device_hard_reset_pending(struct work_struct *work)
 578 {
 579         struct hl_device_reset_work *device_reset_work =
 580                 container_of(work, struct hl_device_reset_work, reset_work);
 581         struct hl_device *hdev = device_reset_work->hdev;
 582
 583         device_kill_open_processes(hdev);
 584
 585         hl_device_reset(hdev, true, true);
 586
 587         kfree(device_reset_work);
 588 }
 589
 590 /*
 591  * hl_device_reset - reset the device
 592  *
 593  * @hdev: pointer to habanalabs device structure
 594  * @hard_reset: should we do hard reset to all engines or just reset the
 595  *              compute/dma engines
 596  *
 597  * Block future CS and wait for pending CS to be enqueued
 598  * Call ASIC H/W fini
 599  * Flush all completions
 600  * Re-initialize all internal data structures
 601  * Call ASIC H/W init, late_init
 602  * Test queues
 603  * Enable device
 604  *
 605  * Returns 0 for success or an error on failure.
 606  */
 607 int hl_device_reset(struct hl_device *hdev, bool hard_reset,
 608                         bool from_hard_reset_thread)
 609 {
 610         int i, rc;
 611
 612         if (!hdev->init_done) {
 613                 dev_err(hdev->dev,
 614                         "Can't reset before initialization is done\n");
 615                 return 0;
 616         }
 617
 618         /*
 619          * Prevent concurrency in this function - only one reset should be
 620          * done at any given time. Only need to perform this if we didn't
 621          * get from the dedicated hard reset thread
 622          */
 623         if (!from_hard_reset_thread) {
 624                 /* Block future CS/VM/JOB completion operations */
 625                 rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
 626                 if (rc)
 627                         return 0;
 628
 629                 /* This also blocks future CS/VM/JOB completion operations */
 630                 hdev->disabled = true;
 631
 632                 /*
 633                  * Flush anyone that is inside the critical section of enqueue
 634                  * jobs to the H/W
 635                  */
 636                 hdev->asic_funcs->hw_queues_lock(hdev);
 637                 hdev->asic_funcs->hw_queues_unlock(hdev);
 638
 639                 dev_err(hdev->dev, "Going to RESET device!\n");
 640         }
 641
 642 again:
 643         if ((hard_reset) && (!from_hard_reset_thread)) {
 644                 struct hl_device_reset_work *device_reset_work;
 645
 646                 hdev->hard_reset_pending = true;
 647
 648                 if (!hdev->pdev) {
 649                         dev_err(hdev->dev,
 650                                 "Reset action is NOT supported in simulator\n");
 651                         rc = -EINVAL;
 652                         goto out_err;
 653                 }
 654
 655                 device_reset_work = kzalloc(sizeof(*device_reset_work),
 656                                                 GFP_ATOMIC);
 657                 if (!device_reset_work) {
 658                         rc = -ENOMEM;
 659                         goto out_err;
 660                 }
 661
 662                 /*
 663                  * Because the reset function can't run from interrupt or
 664                  * from heartbeat work, we need to call the reset function
 665                  * from a dedicated work
 666                  */
 667                 INIT_WORK(&device_reset_work->reset_work,
 668                                 device_hard_reset_pending);
 669                 device_reset_work->hdev = hdev;
 670                 schedule_work(&device_reset_work->reset_work);
 671
 672                 return 0;
 673         }
 674
 675         if (hard_reset) {
 676                 device_late_fini(hdev);
 677
 678                 /*
 679                  * Now that the heartbeat thread is closed, flush processes
 680                  * which are sending messages to CPU
 681                  */
 682                 mutex_lock(&hdev->send_cpu_message_lock);
 683                 mutex_unlock(&hdev->send_cpu_message_lock);
 684         }
 685
 686         /*
 687          * Halt the engines and disable interrupts so we won't get any more
 688          * completions from H/W and we won't have any accesses from the
 689          * H/W to the host machine
 690          */
 691         hdev->asic_funcs->halt_engines(hdev, hard_reset);
 692
 693         /* Go over all the queues, release all CS and their jobs */
 694         hl_cs_rollback_all(hdev);
 695
 696         /* Release kernel context */
 697         if ((hard_reset) && (hl_ctx_put(hdev->kernel_ctx) == 1))
 698                 hdev->kernel_ctx = NULL;
 699
 700         /* Reset the H/W. It will be in idle state after this returns */
 701         hdev->asic_funcs->hw_fini(hdev, hard_reset);
 702
 703         if (hard_reset) {
 704                 hl_vm_fini(hdev);
 705                 hl_eq_reset(hdev, &hdev->event_queue);
 706         }
 707
 708         /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */
 709         hl_hw_queue_reset(hdev, hard_reset);
 710         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 711                 hl_cq_reset(hdev, &hdev->completion_queue[i]);
 712
 713         /* Make sure the context switch phase will run again */
 714         if (hdev->user_ctx) {
 715                 atomic_set(&hdev->user_ctx->thread_ctx_switch_token, 1);
 716                 hdev->user_ctx->thread_ctx_switch_wait_token = 0;
 717         }
 718
 719         /* Finished tear-down, starting to re-initialize */
 720
 721         if (hard_reset) {
 722                 hdev->device_cpu_disabled = false;
 723                 hdev->hard_reset_pending = false;
 724
 725                 if (hdev->kernel_ctx) {
 726                         dev_crit(hdev->dev,
 727                                 "kernel ctx was alive during hard reset, something is terribly wrong\n");
 728                         rc = -EBUSY;
 729                         goto out_err;
 730                 }
 731
 732                 /* Allocate the kernel context */
 733                 hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
 734                                                 GFP_KERNEL);
 735                 if (!hdev->kernel_ctx) {
 736                         rc = -ENOMEM;
 737                         goto out_err;
 738                 }
 739
 740                 hdev->user_ctx = NULL;
 741
 742                 rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
 743                 if (rc) {
 744                         dev_err(hdev->dev,
 745                                 "failed to init kernel ctx in hard reset\n");
 746                         kfree(hdev->kernel_ctx);
 747                         hdev->kernel_ctx = NULL;
 748                         goto out_err;
 749                 }
 750         }
 751
 752         rc = hdev->asic_funcs->hw_init(hdev);
 753         if (rc) {
 754                 dev_err(hdev->dev,
 755                         "failed to initialize the H/W after reset\n");
 756                 goto out_err;
 757         }
 758
 759         hdev->disabled = false;
 760
 761         /* Check that the communication with the device is working */
 762         rc = hdev->asic_funcs->test_queues(hdev);
 763         if (rc) {
 764                 dev_err(hdev->dev,
 765                         "Failed to detect if device is alive after reset\n");
 766                 goto out_err;
 767         }
 768
 769         if (hard_reset) {
 770                 rc = device_late_init(hdev);
 771                 if (rc) {
 772                         dev_err(hdev->dev,
 773                                 "Failed late init after hard reset\n");
 774                         goto out_err;
 775                 }
 776
 777                 rc = hl_vm_init(hdev);
 778                 if (rc) {
 779                         dev_err(hdev->dev,
 780                                 "Failed to init memory module after hard reset\n");
 781                         goto out_err;
 782                 }
 783
 784                 hl_set_max_power(hdev, hdev->max_power);
 785         } else {
 786                 rc = hdev->asic_funcs->soft_reset_late_init(hdev);
 787                 if (rc) {
 788                         dev_err(hdev->dev,
 789                                 "Failed late init after soft reset\n");
 790                         goto out_err;
 791                 }
 792         }
 793
 794         atomic_set(&hdev->in_reset, 0);
 795
 796         if (hard_reset)
 797                 hdev->hard_reset_cnt++;
 798         else
 799                 hdev->soft_reset_cnt++;
 800
 801         return 0;
 802
 803 out_err:
 804         hdev->disabled = true;
 805
 806         if (hard_reset) {
 807                 dev_err(hdev->dev,
 808                         "Failed to reset! Device is NOT usable\n");
 809                 hdev->hard_reset_cnt++;
 810         } else {
 811                 dev_err(hdev->dev,
 812                         "Failed to do soft-reset, trying hard reset\n");
 813                 hdev->soft_reset_cnt++;
 814                 hard_reset = true;
 815                 goto again;
 816         }
 817
 818         atomic_set(&hdev->in_reset, 0);
 819
 820         return rc;
 821 }
 822
 823 /*
 824  * hl_device_init - main initialization function for habanalabs device
 825  *
 826  * @hdev: pointer to habanalabs device structure
 827  *
 828  * Allocate an id for the device, do early initialization and then call the
 829  * ASIC specific initialization functions. Finally, create the cdev and the
 830  * Linux device to expose it to the user
 831  */
 832 int hl_device_init(struct hl_device *hdev, struct class *hclass)
 833 {
 834         int i, rc, cq_ready_cnt;
 835
 836         /* Create device */
 837         rc = device_setup_cdev(hdev, hclass, hdev->id, &hl_ops);
 838
 839         if (rc)
 840                 goto out_disabled;
 841
 842         /* Initialize ASIC function pointers and perform early init */
 843         rc = device_early_init(hdev);
 844         if (rc)
 845                 goto release_device;
 846
 847         /*
 848          * Start calling ASIC initialization. First S/W then H/W and finally
 849          * late init
 850          */
 851         rc = hdev->asic_funcs->sw_init(hdev);
 852         if (rc)
 853                 goto early_fini;
 854
 855         /*
 856          * Initialize the H/W queues. Must be done before hw_init, because
 857          * there the addresses of the kernel queue are being written to the
 858          * registers of the device
 859          */
 860         rc = hl_hw_queues_create(hdev);
 861         if (rc) {
 862                 dev_err(hdev->dev, "failed to initialize kernel queues\n");
 863                 goto sw_fini;
 864         }
 865
 866         /*
 867          * Initialize the completion queues. Must be done before hw_init,
 868          * because there the addresses of the completion queues are being
 869          * passed as arguments to request_irq
 870          */
 871         hdev->completion_queue =
 872                         kcalloc(hdev->asic_prop.completion_queues_count,
 873                                 sizeof(*hdev->completion_queue), GFP_KERNEL);
 874
 875         if (!hdev->completion_queue) {
 876                 dev_err(hdev->dev, "failed to allocate completion queues\n");
 877                 rc = -ENOMEM;
 878                 goto hw_queues_destroy;
 879         }
 880
 881         for (i = 0, cq_ready_cnt = 0;
 882                         i < hdev->asic_prop.completion_queues_count;
 883                         i++, cq_ready_cnt++) {
 884                 rc = hl_cq_init(hdev, &hdev->completion_queue[i], i);
 885                 if (rc) {
 886                         dev_err(hdev->dev,
 887                                 "failed to initialize completion queue\n");
 888                         goto cq_fini;
 889                 }
 890         }
 891
 892         /*
 893          * Initialize the event queue. Must be done before hw_init,
 894          * because there the address of the event queue is being
 895          * passed as argument to request_irq
 896          */
 897         rc = hl_eq_init(hdev, &hdev->event_queue);
 898         if (rc) {
 899                 dev_err(hdev->dev, "failed to initialize event queue\n");
 900                 goto cq_fini;
 901         }
 902
 903         /* Allocate the kernel context */
 904         hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
 905         if (!hdev->kernel_ctx) {
 906                 rc = -ENOMEM;
 907                 goto eq_fini;
 908         }
 909
 910         hdev->user_ctx = NULL;
 911
 912         rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
 913         if (rc) {
 914                 dev_err(hdev->dev, "failed to initialize kernel context\n");
 915                 goto free_ctx;
 916         }
 917
 918         rc = hl_cb_pool_init(hdev);
 919         if (rc) {
 920                 dev_err(hdev->dev, "failed to initialize CB pool\n");
 921                 goto release_ctx;
 922         }
 923
 924         rc = hl_sysfs_init(hdev);
 925         if (rc) {
 926                 dev_err(hdev->dev, "failed to initialize sysfs\n");
 927                 goto free_cb_pool;
 928         }
 929
 930         hl_debugfs_add_device(hdev);
 931
 932         if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
 933                 dev_info(hdev->dev,
 934                         "H/W state is dirty, must reset before initializing\n");
 935                 hdev->asic_funcs->hw_fini(hdev, true);
 936         }
 937
 938         rc = hdev->asic_funcs->hw_init(hdev);
 939         if (rc) {
 940                 dev_err(hdev->dev, "failed to initialize the H/W\n");
 941                 rc = 0;
 942                 goto out_disabled;
 943         }
 944
 945         hdev->disabled = false;
 946
 947         /* Check that the communication with the device is working */
 948         rc = hdev->asic_funcs->test_queues(hdev);
 949         if (rc) {
 950                 dev_err(hdev->dev, "Failed to detect if device is alive\n");
 951                 rc = 0;
 952                 goto out_disabled;
 953         }
 954
 955         /* After test_queues, KMD can start sending messages to device CPU */
 956
 957         rc = device_late_init(hdev);
 958         if (rc) {
 959                 dev_err(hdev->dev, "Failed late initialization\n");
 960                 rc = 0;
 961                 goto out_disabled;
 962         }
 963
 964         dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
 965                 hdev->asic_name,
 966                 hdev->asic_prop.dram_size / 1024 / 1024 / 1024);
 967
 968         rc = hl_vm_init(hdev);
 969         if (rc) {
 970                 dev_err(hdev->dev, "Failed to initialize memory module\n");
 971                 rc = 0;
 972                 goto out_disabled;
 973         }
 974
 975         /*
 976          * hl_hwmon_init must be called after device_late_init, because only
 977          * there we get the information from the device about which
 978          * hwmon-related sensors the device supports
 979          */
 980         rc = hl_hwmon_init(hdev);
 981         if (rc) {
 982                 dev_err(hdev->dev, "Failed to initialize hwmon\n");
 983                 rc = 0;
 984                 goto out_disabled;
 985         }
 986
 987         dev_notice(hdev->dev,
 988                 "Successfully added device to habanalabs driver\n");
 989
 990         hdev->init_done = true;
 991
 992         return 0;
 993
 994 free_cb_pool:
 995         hl_cb_pool_fini(hdev);
 996 release_ctx:
 997         if (hl_ctx_put(hdev->kernel_ctx) != 1)
 998                 dev_err(hdev->dev,
 999                         "kernel ctx is still alive on initialization failure\n");
1000 free_ctx:
1001         kfree(hdev->kernel_ctx);
1002 eq_fini:
1003         hl_eq_fini(hdev, &hdev->event_queue);
1004 cq_fini:
1005         for (i = 0 ; i < cq_ready_cnt ; i++)
1006                 hl_cq_fini(hdev, &hdev->completion_queue[i]);
1007         kfree(hdev->completion_queue);
1008 hw_queues_destroy:
1009         hl_hw_queues_destroy(hdev);
1010 sw_fini:
1011         hdev->asic_funcs->sw_fini(hdev);
1012 early_fini:
1013         device_early_fini(hdev);
1014 release_device:
1015         device_destroy(hclass, hdev->dev->devt);
1016         cdev_del(&hdev->cdev);
1017 out_disabled:
1018         hdev->disabled = true;
1019         if (hdev->pdev)
1020                 dev_err(&hdev->pdev->dev,
1021                         "Failed to initialize hl%d. Device is NOT usable !\n",
1022                         hdev->id);
1023         else
1024                 pr_err("Failed to initialize hl%d. Device is NOT usable !\n",
1025                         hdev->id);
1026
1027         return rc;
1028 }
1029
1030 /*
1031  * hl_device_fini - main tear-down function for habanalabs device
1032  *
1033  * @hdev: pointer to habanalabs device structure
1034  *
1035  * Destroy the device, call ASIC fini functions and release the id
1036  */
1037 void hl_device_fini(struct hl_device *hdev)
1038 {
1039         int i, rc;
1040         ktime_t timeout;
1041
1042         dev_info(hdev->dev, "Removing device\n");
1043
1044         /*
1045          * This function is competing with the reset function, so try to
1046          * take the reset atomic and if we are already in middle of reset,
1047          * wait until reset function is finished. Reset function is designed
1048          * to always finish (could take up to a few seconds in worst case).
1049          */
1050
1051         timeout = ktime_add_us(ktime_get(),
1052                                 HL_PENDING_RESET_PER_SEC * 1000 * 1000 * 4);
1053         rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
1054         while (rc) {
1055                 usleep_range(50, 200);
1056                 rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
1057                 if (ktime_compare(ktime_get(), timeout) > 0) {
1058                         WARN(1, "Failed to remove device because reset function did not finish\n");
1059                         return;
1060                 }
1061         }
1062
1063         /* Mark device as disabled */
1064         hdev->disabled = true;
1065
1066         /*
1067          * Flush anyone that is inside the critical section of enqueue
1068          * jobs to the H/W
1069          */
1070         hdev->asic_funcs->hw_queues_lock(hdev);
1071         hdev->asic_funcs->hw_queues_unlock(hdev);
1072
1073         hdev->hard_reset_pending = true;
1074
1075         device_kill_open_processes(hdev);
1076
1077         hl_hwmon_fini(hdev);
1078
1079         device_late_fini(hdev);
1080
1081         hl_debugfs_remove_device(hdev);
1082
1083         hl_sysfs_fini(hdev);
1084
1085         /*
1086          * Halt the engines and disable interrupts so we won't get any more
1087          * completions from H/W and we won't have any accesses from the
1088          * H/W to the host machine
1089          */
1090         hdev->asic_funcs->halt_engines(hdev, true);
1091
1092         /* Go over all the queues, release all CS and their jobs */
1093         hl_cs_rollback_all(hdev);
1094
1095         hl_cb_pool_fini(hdev);
1096
1097         /* Release kernel context */
1098         if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
1099                 dev_err(hdev->dev, "kernel ctx is still alive\n");
1100
1101         /* Reset the H/W. It will be in idle state after this returns */
1102         hdev->asic_funcs->hw_fini(hdev, true);
1103
1104         hl_vm_fini(hdev);
1105
1106         hl_eq_fini(hdev, &hdev->event_queue);
1107
1108         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1109                 hl_cq_fini(hdev, &hdev->completion_queue[i]);
1110         kfree(hdev->completion_queue);
1111
1112         hl_hw_queues_destroy(hdev);
1113
1114         /* Call ASIC S/W finalize function */
1115         hdev->asic_funcs->sw_fini(hdev);
1116
1117         device_early_fini(hdev);
1118
1119         /* Hide device from user */
1120         device_destroy(hdev->dev->class, hdev->dev->devt);
1121         cdev_del(&hdev->cdev);
1122
1123         pr_info("removed device successfully\n");
1124 }
1125
1126 /*
1127  * hl_poll_timeout_memory - Periodically poll a host memory address
1128  *                              until it is not zero or a timeout occurs
1129  * @hdev: pointer to habanalabs device structure
1130  * @addr: Address to poll
1131  * @timeout_us: timeout in us
1132  * @val: Variable to read the value into
1133  *
1134  * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
1135  * case, the last read value at @addr is stored in @val. Must not
1136  * be called from atomic context if sleep_us or timeout_us are used.
1137  *
1138  * The function sleeps for 100us with timeout value of
1139  * timeout_us
1140  */
1141 int hl_poll_timeout_memory(struct hl_device *hdev, u64 addr,
1142                                 u32 timeout_us, u32 *val)
1143 {
1144         /*
1145          * address in this function points always to a memory location in the
1146          * host's (server's) memory. That location is updated asynchronously
1147          * either by the direct access of the device or by another core
1148          */
1149         u32 *paddr = (u32 *) (uintptr_t) addr;
1150         ktime_t timeout;
1151
1152         /* timeout should be longer when working with simulator */
1153         if (!hdev->pdev)
1154                 timeout_us *= 10;
1155
1156         timeout = ktime_add_us(ktime_get(), timeout_us);
1157
1158         might_sleep();
1159
1160         for (;;) {
1161                 /*
1162                  * Flush CPU read/write buffers to make sure we read updates
1163                  * done by other cores or by the device
1164                  */
1165                 mb();
1166                 *val = *paddr;
1167                 if (*val)
1168                         break;
1169                 if (ktime_compare(ktime_get(), timeout) > 0) {
1170                         *val = *paddr;
1171                         break;
1172                 }
1173                 usleep_range((100 >> 2) + 1, 100);
1174         }
1175
1176         return *val ? 0 : -ETIMEDOUT;
1177 }
1178
1179 /*
1180  * hl_poll_timeout_devicememory - Periodically poll a device memory address
1181  *                                until it is not zero or a timeout occurs
1182  * @hdev: pointer to habanalabs device structure
1183  * @addr: Device address to poll
1184  * @timeout_us: timeout in us
1185  * @val: Variable to read the value into
1186  *
1187  * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
1188  * case, the last read value at @addr is stored in @val. Must not
1189  * be called from atomic context if sleep_us or timeout_us are used.
1190  *
1191  * The function sleeps for 100us with timeout value of
1192  * timeout_us
1193  */
1194 int hl_poll_timeout_device_memory(struct hl_device *hdev, void __iomem *addr,
1195                                 u32 timeout_us, u32 *val)
1196 {
1197         ktime_t timeout = ktime_add_us(ktime_get(), timeout_us);
1198
1199         might_sleep();
1200
1201         for (;;) {
1202                 *val = readl(addr);
1203                 if (*val)
1204                         break;
1205                 if (ktime_compare(ktime_get(), timeout) > 0) {
1206                         *val = readl(addr);
1207                         break;
1208                 }
1209                 usleep_range((100 >> 2) + 1, 100);
1210         }
1211
1212         return *val ? 0 : -ETIMEDOUT;
1213 }
1214
1215 /*
1216  * MMIO register access helper functions.
1217  */
1218
1219 /*
1220  * hl_rreg - Read an MMIO register
1221  *
1222  * @hdev: pointer to habanalabs device structure
1223  * @reg: MMIO register offset (in bytes)
1224  *
1225  * Returns the value of the MMIO register we are asked to read
1226  *
1227  */
1228 inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
1229 {
1230         return readl(hdev->rmmio + reg);
1231 }
1232
1233 /*
1234  * hl_wreg - Write to an MMIO register
1235  *
1236  * @hdev: pointer to habanalabs device structure
1237  * @reg: MMIO register offset (in bytes)
1238  * @val: 32-bit value
1239  *
1240  * Writes the 32-bit value into the MMIO register
1241  *
1242  */
1243 inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
1244 {
1245         writel(val, hdev->rmmio + reg);
1246 }