linux/drivers/misc/habanalabs/common/device.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3/*
   4 * Copyright 2016-2019 HabanaLabs, Ltd.
   5 * All Rights Reserved.
   6 */
   7
   8#define pr_fmt(fmt)                     "habanalabs: " fmt
   9
  10#include "habanalabs.h"
  11
  12#include <linux/pci.h>
  13#include <linux/hwmon.h>
  14#include <uapi/misc/habanalabs.h>
  15
  16enum hl_device_status hl_device_status(struct hl_device *hdev)
  17{
  18        enum hl_device_status status;
  19
  20        if (atomic_read(&hdev->in_reset))
  21                status = HL_DEVICE_STATUS_IN_RESET;
  22        else if (hdev->needs_reset)
  23                status = HL_DEVICE_STATUS_NEEDS_RESET;
  24        else if (hdev->disabled)
  25                status = HL_DEVICE_STATUS_MALFUNCTION;
  26        else
  27                status = HL_DEVICE_STATUS_OPERATIONAL;
  28
  29        return status;
  30}
  31
  32bool hl_device_operational(struct hl_device *hdev,
  33                enum hl_device_status *status)
  34{
  35        enum hl_device_status current_status;
  36
  37        current_status = hl_device_status(hdev);
  38        if (status)
  39                *status = current_status;
  40
  41        switch (current_status) {
  42        case HL_DEVICE_STATUS_IN_RESET:
  43        case HL_DEVICE_STATUS_MALFUNCTION:
  44        case HL_DEVICE_STATUS_NEEDS_RESET:
  45                return false;
  46        case HL_DEVICE_STATUS_OPERATIONAL:
  47        default:
  48                return true;
  49        }
  50}
  51
  52static void hpriv_release(struct kref *ref)
  53{
  54        u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
  55        bool device_is_idle = true;
  56        struct hl_fpriv *hpriv;
  57        struct hl_device *hdev;
  58
  59        hpriv = container_of(ref, struct hl_fpriv, refcount);
  60
  61        hdev = hpriv->hdev;
  62
  63        put_pid(hpriv->taskpid);
  64
  65        hl_debugfs_remove_file(hpriv);
  66
  67        mutex_destroy(&hpriv->restore_phase_mutex);
  68
  69        mutex_lock(&hdev->fpriv_list_lock);
  70        list_del(&hpriv->dev_node);
  71        hdev->compute_ctx = NULL;
  72        mutex_unlock(&hdev->fpriv_list_lock);
  73
  74        kfree(hpriv);
  75
  76        if ((!hdev->pldm) && (hdev->pdev) &&
  77                        (!hdev->asic_funcs->is_device_idle(hdev,
  78                                idle_mask,
  79                                HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL))) {
  80                dev_err(hdev->dev,
  81                        "device not idle after user context is closed (0x%llx_%llx)\n",
  82                        idle_mask[1], idle_mask[0]);
  83
  84                device_is_idle = false;
  85        }
  86
  87        if ((hdev->reset_if_device_not_idle && !device_is_idle)
  88                        || hdev->reset_upon_device_release)
  89                hl_device_reset(hdev, HL_RESET_DEVICE_RELEASE);
  90}
  91
  92void hl_hpriv_get(struct hl_fpriv *hpriv)
  93{
  94        kref_get(&hpriv->refcount);
  95}
  96
  97int hl_hpriv_put(struct hl_fpriv *hpriv)
  98{
  99        return kref_put(&hpriv->refcount, hpriv_release);
 100}
 101
 102/*
 103 * hl_device_release - release function for habanalabs device
 104 *
 105 * @inode: pointer to inode structure
 106 * @filp: pointer to file structure
 107 *
 108 * Called when process closes an habanalabs device
 109 */
 110static int hl_device_release(struct inode *inode, struct file *filp)
 111{
 112        struct hl_fpriv *hpriv = filp->private_data;
 113        struct hl_device *hdev = hpriv->hdev;
 114
 115        filp->private_data = NULL;
 116
 117        if (!hdev) {
 118                pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n");
 119                put_pid(hpriv->taskpid);
 120                return 0;
 121        }
 122
 123        /* Each pending user interrupt holds the user's context, hence we
 124         * must release them all before calling hl_ctx_mgr_fini().
 125         */
 126        hl_release_pending_user_interrupts(hpriv->hdev);
 127
 128        hl_cb_mgr_fini(hdev, &hpriv->cb_mgr);
 129        hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
 130
 131        if (!hl_hpriv_put(hpriv))
 132                dev_warn(hdev->dev,
 133                        "Device is still in use because there are live CS and/or memory mappings\n");
 134
 135        hdev->last_open_session_duration_jif =
 136                jiffies - hdev->last_successful_open_jif;
 137
 138        return 0;
 139}
 140
 141static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
 142{
 143        struct hl_fpriv *hpriv = filp->private_data;
 144        struct hl_device *hdev = hpriv->hdev;
 145
 146        filp->private_data = NULL;
 147
 148        if (!hdev) {
 149                pr_err("Closing FD after device was removed\n");
 150                goto out;
 151        }
 152
 153        mutex_lock(&hdev->fpriv_list_lock);
 154        list_del(&hpriv->dev_node);
 155        mutex_unlock(&hdev->fpriv_list_lock);
 156out:
 157        put_pid(hpriv->taskpid);
 158
 159        kfree(hpriv);
 160
 161        return 0;
 162}
 163
 164/*
 165 * hl_mmap - mmap function for habanalabs device
 166 *
 167 * @*filp: pointer to file structure
 168 * @*vma: pointer to vm_area_struct of the process
 169 *
 170 * Called when process does an mmap on habanalabs device. Call the device's mmap
 171 * function at the end of the common code.
 172 */
 173static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
 174{
 175        struct hl_fpriv *hpriv = filp->private_data;
 176        struct hl_device *hdev = hpriv->hdev;
 177        unsigned long vm_pgoff;
 178
 179        if (!hdev) {
 180                pr_err_ratelimited("Trying to mmap after device was removed! Please close FD\n");
 181                return -ENODEV;
 182        }
 183
 184        vm_pgoff = vma->vm_pgoff;
 185        vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff);
 186
 187        switch (vm_pgoff & HL_MMAP_TYPE_MASK) {
 188        case HL_MMAP_TYPE_CB:
 189                return hl_cb_mmap(hpriv, vma);
 190
 191        case HL_MMAP_TYPE_BLOCK:
 192                return hl_hw_block_mmap(hpriv, vma);
 193        }
 194
 195        return -EINVAL;
 196}
 197
 198static const struct file_operations hl_ops = {
 199        .owner = THIS_MODULE,
 200        .open = hl_device_open,
 201        .release = hl_device_release,
 202        .mmap = hl_mmap,
 203        .unlocked_ioctl = hl_ioctl,
 204        .compat_ioctl = hl_ioctl
 205};
 206
 207static const struct file_operations hl_ctrl_ops = {
 208        .owner = THIS_MODULE,
 209        .open = hl_device_open_ctrl,
 210        .release = hl_device_release_ctrl,
 211        .unlocked_ioctl = hl_ioctl_control,
 212        .compat_ioctl = hl_ioctl_control
 213};
 214
 215static void device_release_func(struct device *dev)
 216{
 217        kfree(dev);
 218}
 219
 220/*
 221 * device_init_cdev - Initialize cdev and device for habanalabs device
 222 *
 223 * @hdev: pointer to habanalabs device structure
 224 * @hclass: pointer to the class object of the device
 225 * @minor: minor number of the specific device
 226 * @fpos: file operations to install for this device
 227 * @name: name of the device as it will appear in the filesystem
 228 * @cdev: pointer to the char device object that will be initialized
 229 * @dev: pointer to the device object that will be initialized
 230 *
 231 * Initialize a cdev and a Linux device for habanalabs's device.
 232 */
 233static int device_init_cdev(struct hl_device *hdev, struct class *hclass,
 234                                int minor, const struct file_operations *fops,
 235                                char *name, struct cdev *cdev,
 236                                struct device **dev)
 237{
 238        cdev_init(cdev, fops);
 239        cdev->owner = THIS_MODULE;
 240
 241        *dev = kzalloc(sizeof(**dev), GFP_KERNEL);
 242        if (!*dev)
 243                return -ENOMEM;
 244
 245        device_initialize(*dev);
 246        (*dev)->devt = MKDEV(hdev->major, minor);
 247        (*dev)->class = hclass;
 248        (*dev)->release = device_release_func;
 249        dev_set_drvdata(*dev, hdev);
 250        dev_set_name(*dev, "%s", name);
 251
 252        return 0;
 253}
 254
 255static int device_cdev_sysfs_add(struct hl_device *hdev)
 256{
 257        int rc;
 258
 259        rc = cdev_device_add(&hdev->cdev, hdev->dev);
 260        if (rc) {
 261                dev_err(hdev->dev,
 262                        "failed to add a char device to the system\n");
 263                return rc;
 264        }
 265
 266        rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
 267        if (rc) {
 268                dev_err(hdev->dev,
 269                        "failed to add a control char device to the system\n");
 270                goto delete_cdev_device;
 271        }
 272
 273        /* hl_sysfs_init() must be done after adding the device to the system */
 274        rc = hl_sysfs_init(hdev);
 275        if (rc) {
 276                dev_err(hdev->dev, "failed to initialize sysfs\n");
 277                goto delete_ctrl_cdev_device;
 278        }
 279
 280        hdev->cdev_sysfs_created = true;
 281
 282        return 0;
 283
 284delete_ctrl_cdev_device:
 285        cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
 286delete_cdev_device:
 287        cdev_device_del(&hdev->cdev, hdev->dev);
 288        return rc;
 289}
 290
 291static void device_cdev_sysfs_del(struct hl_device *hdev)
 292{
 293        if (!hdev->cdev_sysfs_created)
 294                goto put_devices;
 295
 296        hl_sysfs_fini(hdev);
 297        cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
 298        cdev_device_del(&hdev->cdev, hdev->dev);
 299
 300put_devices:
 301        put_device(hdev->dev);
 302        put_device(hdev->dev_ctrl);
 303}
 304
 305static void device_hard_reset_pending(struct work_struct *work)
 306{
 307        struct hl_device_reset_work *device_reset_work =
 308                container_of(work, struct hl_device_reset_work,
 309                                reset_work.work);
 310        struct hl_device *hdev = device_reset_work->hdev;
 311        int rc;
 312
 313        rc = hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD);
 314        if ((rc == -EBUSY) && !hdev->device_fini_pending) {
 315                dev_info(hdev->dev,
 316                        "Could not reset device. will try again in %u seconds",
 317                        HL_PENDING_RESET_PER_SEC);
 318
 319                queue_delayed_work(device_reset_work->wq,
 320                        &device_reset_work->reset_work,
 321                        msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000));
 322        }
 323}
 324
 325/*
 326 * device_early_init - do some early initialization for the habanalabs device
 327 *
 328 * @hdev: pointer to habanalabs device structure
 329 *
 330 * Install the relevant function pointers and call the early_init function,
 331 * if such a function exists
 332 */
 333static int device_early_init(struct hl_device *hdev)
 334{
 335        int i, rc;
 336        char workq_name[32];
 337
 338        switch (hdev->asic_type) {
 339        case ASIC_GOYA:
 340                goya_set_asic_funcs(hdev);
 341                strscpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
 342                break;
 343        case ASIC_GAUDI:
 344                gaudi_set_asic_funcs(hdev);
 345                strscpy(hdev->asic_name, "GAUDI", sizeof(hdev->asic_name));
 346                break;
 347        case ASIC_GAUDI_SEC:
 348                gaudi_set_asic_funcs(hdev);
 349                strscpy(hdev->asic_name, "GAUDI SEC", sizeof(hdev->asic_name));
 350                break;
 351        default:
 352                dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
 353                        hdev->asic_type);
 354                return -EINVAL;
 355        }
 356
 357        rc = hdev->asic_funcs->early_init(hdev);
 358        if (rc)
 359                return rc;
 360
 361        rc = hl_asid_init(hdev);
 362        if (rc)
 363                goto early_fini;
 364
 365        if (hdev->asic_prop.completion_queues_count) {
 366                hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
 367                                sizeof(*hdev->cq_wq),
 368                                GFP_KERNEL);
 369                if (!hdev->cq_wq) {
 370                        rc = -ENOMEM;
 371                        goto asid_fini;
 372                }
 373        }
 374
 375        for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
 376                snprintf(workq_name, 32, "hl-free-jobs-%u", (u32) i);
 377                hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
 378                if (hdev->cq_wq[i] == NULL) {
 379                        dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
 380                        rc = -ENOMEM;
 381                        goto free_cq_wq;
 382                }
 383        }
 384
 385        hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
 386        if (hdev->eq_wq == NULL) {
 387                dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
 388                rc = -ENOMEM;
 389                goto free_cq_wq;
 390        }
 391
 392        hdev->sob_reset_wq = alloc_workqueue("hl-sob-reset", WQ_UNBOUND, 0);
 393        if (!hdev->sob_reset_wq) {
 394                dev_err(hdev->dev,
 395                        "Failed to allocate SOB reset workqueue\n");
 396                rc = -ENOMEM;
 397                goto free_eq_wq;
 398        }
 399
 400        hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
 401                                        GFP_KERNEL);
 402        if (!hdev->hl_chip_info) {
 403                rc = -ENOMEM;
 404                goto free_sob_reset_wq;
 405        }
 406
 407        rc = hl_mmu_if_set_funcs(hdev);
 408        if (rc)
 409                goto free_chip_info;
 410
 411        hl_cb_mgr_init(&hdev->kernel_cb_mgr);
 412
 413        hdev->device_reset_work.wq =
 414                        create_singlethread_workqueue("hl_device_reset");
 415        if (!hdev->device_reset_work.wq) {
 416                rc = -ENOMEM;
 417                dev_err(hdev->dev, "Failed to create device reset WQ\n");
 418                goto free_cb_mgr;
 419        }
 420
 421        INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work,
 422                        device_hard_reset_pending);
 423        hdev->device_reset_work.hdev = hdev;
 424        hdev->device_fini_pending = 0;
 425
 426        mutex_init(&hdev->send_cpu_message_lock);
 427        mutex_init(&hdev->debug_lock);
 428        INIT_LIST_HEAD(&hdev->cs_mirror_list);
 429        spin_lock_init(&hdev->cs_mirror_lock);
 430        INIT_LIST_HEAD(&hdev->fpriv_list);
 431        mutex_init(&hdev->fpriv_list_lock);
 432        atomic_set(&hdev->in_reset, 0);
 433
 434        return 0;
 435
 436free_cb_mgr:
 437        hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
 438free_chip_info:
 439        kfree(hdev->hl_chip_info);
 440free_sob_reset_wq:
 441        destroy_workqueue(hdev->sob_reset_wq);
 442free_eq_wq:
 443        destroy_workqueue(hdev->eq_wq);
 444free_cq_wq:
 445        for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 446                if (hdev->cq_wq[i])
 447                        destroy_workqueue(hdev->cq_wq[i]);
 448        kfree(hdev->cq_wq);
 449asid_fini:
 450        hl_asid_fini(hdev);
 451early_fini:
 452        if (hdev->asic_funcs->early_fini)
 453                hdev->asic_funcs->early_fini(hdev);
 454
 455        return rc;
 456}
 457
 458/*
 459 * device_early_fini - finalize all that was done in device_early_init
 460 *
 461 * @hdev: pointer to habanalabs device structure
 462 *
 463 */
 464static void device_early_fini(struct hl_device *hdev)
 465{
 466        int i;
 467
 468        mutex_destroy(&hdev->debug_lock);
 469        mutex_destroy(&hdev->send_cpu_message_lock);
 470
 471        mutex_destroy(&hdev->fpriv_list_lock);
 472
 473        hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
 474
 475        kfree(hdev->hl_chip_info);
 476
 477        destroy_workqueue(hdev->sob_reset_wq);
 478        destroy_workqueue(hdev->eq_wq);
 479        destroy_workqueue(hdev->device_reset_work.wq);
 480
 481        for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 482                destroy_workqueue(hdev->cq_wq[i]);
 483        kfree(hdev->cq_wq);
 484
 485        hl_asid_fini(hdev);
 486
 487        if (hdev->asic_funcs->early_fini)
 488                hdev->asic_funcs->early_fini(hdev);
 489}
 490
 491static void set_freq_to_low_job(struct work_struct *work)
 492{
 493        struct hl_device *hdev = container_of(work, struct hl_device,
 494                                                work_freq.work);
 495
 496        mutex_lock(&hdev->fpriv_list_lock);
 497
 498        if (!hdev->compute_ctx)
 499                hl_device_set_frequency(hdev, PLL_LOW);
 500
 501        mutex_unlock(&hdev->fpriv_list_lock);
 502
 503        schedule_delayed_work(&hdev->work_freq,
 504                        usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
 505}
 506
 507static void hl_device_heartbeat(struct work_struct *work)
 508{
 509        struct hl_device *hdev = container_of(work, struct hl_device,
 510                                                work_heartbeat.work);
 511
 512        if (!hl_device_operational(hdev, NULL))
 513                goto reschedule;
 514
 515        if (!hdev->asic_funcs->send_heartbeat(hdev))
 516                goto reschedule;
 517
 518        dev_err(hdev->dev, "Device heartbeat failed!\n");
 519        hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_HEARTBEAT);
 520
 521        return;
 522
 523reschedule:
 524        schedule_delayed_work(&hdev->work_heartbeat,
 525                        usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
 526}
 527
 528/*
 529 * device_late_init - do late stuff initialization for the habanalabs device
 530 *
 531 * @hdev: pointer to habanalabs device structure
 532 *
 533 * Do stuff that either needs the device H/W queues to be active or needs
 534 * to happen after all the rest of the initialization is finished
 535 */
 536static int device_late_init(struct hl_device *hdev)
 537{
 538        int rc;
 539
 540        if (hdev->asic_funcs->late_init) {
 541                rc = hdev->asic_funcs->late_init(hdev);
 542                if (rc) {
 543                        dev_err(hdev->dev,
 544                                "failed late initialization for the H/W\n");
 545                        return rc;
 546                }
 547        }
 548
 549        hdev->high_pll = hdev->asic_prop.high_pll;
 550
 551        /* force setting to low frequency */
 552        hdev->curr_pll_profile = PLL_LOW;
 553
 554        if (hdev->pm_mng_profile == PM_AUTO)
 555                hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
 556        else
 557                hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
 558
 559        INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
 560        schedule_delayed_work(&hdev->work_freq,
 561        usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
 562
 563        if (hdev->heartbeat) {
 564                INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
 565                schedule_delayed_work(&hdev->work_heartbeat,
 566                                usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
 567        }
 568
 569        hdev->late_init_done = true;
 570
 571        return 0;
 572}
 573
 574/*
 575 * device_late_fini - finalize all that was done in device_late_init
 576 *
 577 * @hdev: pointer to habanalabs device structure
 578 *
 579 */
 580static void device_late_fini(struct hl_device *hdev)
 581{
 582        if (!hdev->late_init_done)
 583                return;
 584
 585        cancel_delayed_work_sync(&hdev->work_freq);
 586        if (hdev->heartbeat)
 587                cancel_delayed_work_sync(&hdev->work_heartbeat);
 588
 589        if (hdev->asic_funcs->late_fini)
 590                hdev->asic_funcs->late_fini(hdev);
 591
 592        hdev->late_init_done = false;
 593}
 594
 595int hl_device_utilization(struct hl_device *hdev, u32 *utilization)
 596{
 597        u64 max_power, curr_power, dc_power, dividend;
 598        int rc;
 599
 600        max_power = hdev->asic_prop.max_power_default;
 601        dc_power = hdev->asic_prop.dc_power_default;
 602        rc = hl_fw_cpucp_power_get(hdev, &curr_power);
 603
 604        if (rc)
 605                return rc;
 606
 607        curr_power = clamp(curr_power, dc_power, max_power);
 608
 609        dividend = (curr_power - dc_power) * 100;
 610        *utilization = (u32) div_u64(dividend, (max_power - dc_power));
 611
 612        return 0;
 613}
 614
 615/*
 616 * hl_device_set_frequency - set the frequency of the device
 617 *
 618 * @hdev: pointer to habanalabs device structure
 619 * @freq: the new frequency value
 620 *
 621 * Change the frequency if needed. This function has no protection against
 622 * concurrency, therefore it is assumed that the calling function has protected
 623 * itself against the case of calling this function from multiple threads with
 624 * different values
 625 *
 626 * Returns 0 if no change was done, otherwise returns 1
 627 */
 628int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
 629{
 630        if ((hdev->pm_mng_profile == PM_MANUAL) ||
 631                        (hdev->curr_pll_profile == freq))
 632                return 0;
 633
 634        dev_dbg(hdev->dev, "Changing device frequency to %s\n",
 635                freq == PLL_HIGH ? "high" : "low");
 636
 637        hdev->asic_funcs->set_pll_profile(hdev, freq);
 638
 639        hdev->curr_pll_profile = freq;
 640
 641        return 1;
 642}
 643
 644int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
 645{
 646        int rc = 0;
 647
 648        mutex_lock(&hdev->debug_lock);
 649
 650        if (!enable) {
 651                if (!hdev->in_debug) {
 652                        dev_err(hdev->dev,
 653                                "Failed to disable debug mode because device was not in debug mode\n");
 654                        rc = -EFAULT;
 655                        goto out;
 656                }
 657
 658                if (!hdev->hard_reset_pending)
 659                        hdev->asic_funcs->halt_coresight(hdev);
 660
 661                hdev->in_debug = 0;
 662
 663                if (!hdev->hard_reset_pending)
 664                        hdev->asic_funcs->set_clock_gating(hdev);
 665
 666                goto out;
 667        }
 668
 669        if (hdev->in_debug) {
 670                dev_err(hdev->dev,
 671                        "Failed to enable debug mode because device is already in debug mode\n");
 672                rc = -EFAULT;
 673                goto out;
 674        }
 675
 676        hdev->asic_funcs->disable_clock_gating(hdev);
 677        hdev->in_debug = 1;
 678
 679out:
 680        mutex_unlock(&hdev->debug_lock);
 681
 682        return rc;
 683}
 684
 685/*
 686 * hl_device_suspend - initiate device suspend
 687 *
 688 * @hdev: pointer to habanalabs device structure
 689 *
 690 * Puts the hw in the suspend state (all asics).
 691 * Returns 0 for success or an error on failure.
 692 * Called at driver suspend.
 693 */
 694int hl_device_suspend(struct hl_device *hdev)
 695{
 696        int rc;
 697
 698        pci_save_state(hdev->pdev);
 699
 700        /* Block future CS/VM/JOB completion operations */
 701        rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
 702        if (rc) {
 703                dev_err(hdev->dev, "Can't suspend while in reset\n");
 704                return -EIO;
 705        }
 706
 707        /* This blocks all other stuff that is not blocked by in_reset */
 708        hdev->disabled = true;
 709
 710        /*
 711         * Flush anyone that is inside the critical section of enqueue
 712         * jobs to the H/W
 713         */
 714        hdev->asic_funcs->hw_queues_lock(hdev);
 715        hdev->asic_funcs->hw_queues_unlock(hdev);
 716
 717        /* Flush processes that are sending message to CPU */
 718        mutex_lock(&hdev->send_cpu_message_lock);
 719        mutex_unlock(&hdev->send_cpu_message_lock);
 720
 721        rc = hdev->asic_funcs->suspend(hdev);
 722        if (rc)
 723                dev_err(hdev->dev,
 724                        "Failed to disable PCI access of device CPU\n");
 725
 726        /* Shut down the device */
 727        pci_disable_device(hdev->pdev);
 728        pci_set_power_state(hdev->pdev, PCI_D3hot);
 729
 730        return 0;
 731}
 732
 733/*
 734 * hl_device_resume - initiate device resume
 735 *
 736 * @hdev: pointer to habanalabs device structure
 737 *
 738 * Bring the hw back to operating state (all asics).
 739 * Returns 0 for success or an error on failure.
 740 * Called at driver resume.
 741 */
 742int hl_device_resume(struct hl_device *hdev)
 743{
 744        int rc;
 745
 746        pci_set_power_state(hdev->pdev, PCI_D0);
 747        pci_restore_state(hdev->pdev);
 748        rc = pci_enable_device_mem(hdev->pdev);
 749        if (rc) {
 750                dev_err(hdev->dev,
 751                        "Failed to enable PCI device in resume\n");
 752                return rc;
 753        }
 754
 755        pci_set_master(hdev->pdev);
 756
 757        rc = hdev->asic_funcs->resume(hdev);
 758        if (rc) {
 759                dev_err(hdev->dev, "Failed to resume device after suspend\n");
 760                goto disable_device;
 761        }
 762
 763
 764        hdev->disabled = false;
 765        atomic_set(&hdev->in_reset, 0);
 766
 767        rc = hl_device_reset(hdev, HL_RESET_HARD);
 768        if (rc) {
 769                dev_err(hdev->dev, "Failed to reset device during resume\n");
 770                goto disable_device;
 771        }
 772
 773        return 0;
 774
 775disable_device:
 776        pci_clear_master(hdev->pdev);
 777        pci_disable_device(hdev->pdev);
 778
 779        return rc;
 780}
 781
 782static int device_kill_open_processes(struct hl_device *hdev, u32 timeout)
 783{
 784        struct hl_fpriv *hpriv;
 785        struct task_struct *task = NULL;
 786        u32 pending_cnt;
 787
 788
 789        /* Giving time for user to close FD, and for processes that are inside
 790         * hl_device_open to finish
 791         */
 792        if (!list_empty(&hdev->fpriv_list))
 793                ssleep(1);
 794
 795        if (timeout) {
 796                pending_cnt = timeout;
 797        } else {
 798                if (hdev->process_kill_trial_cnt) {
 799                        /* Processes have been already killed */
 800                        pending_cnt = 1;
 801                        goto wait_for_processes;
 802                } else {
 803                        /* Wait a small period after process kill */
 804                        pending_cnt = HL_PENDING_RESET_PER_SEC;
 805                }
 806        }
 807
 808        mutex_lock(&hdev->fpriv_list_lock);
 809
 810        /* This section must be protected because we are dereferencing
 811         * pointers that are freed if the process exits
 812         */
 813        list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) {
 814                task = get_pid_task(hpriv->taskpid, PIDTYPE_PID);
 815                if (task) {
 816                        dev_info(hdev->dev, "Killing user process pid=%d\n",
 817                                task_pid_nr(task));
 818                        send_sig(SIGKILL, task, 1);
 819                        usleep_range(1000, 10000);
 820
 821                        put_task_struct(task);
 822                }
 823        }
 824
 825        mutex_unlock(&hdev->fpriv_list_lock);
 826
 827        /*
 828         * We killed the open users, but that doesn't mean they are closed.
 829         * It could be that they are running a long cleanup phase in the driver
 830         * e.g. MMU unmappings, or running other long teardown flow even before
 831         * our cleanup.
 832         * Therefore we need to wait again to make sure they are closed before
 833         * continuing with the reset.
 834         */
 835
 836wait_for_processes:
 837        while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) {
 838                dev_dbg(hdev->dev,
 839                        "Waiting for all unmap operations to finish before hard reset\n");
 840
 841                pending_cnt--;
 842
 843                ssleep(1);
 844        }
 845
 846        /* All processes exited successfully */
 847        if (list_empty(&hdev->fpriv_list))
 848                return 0;
 849
 850        /* Give up waiting for processes to exit */
 851        if (hdev->process_kill_trial_cnt == HL_PENDING_RESET_MAX_TRIALS)
 852                return -ETIME;
 853
 854        hdev->process_kill_trial_cnt++;
 855
 856        return -EBUSY;
 857}
 858
 859static void device_disable_open_processes(struct hl_device *hdev)
 860{
 861        struct hl_fpriv *hpriv;
 862
 863        mutex_lock(&hdev->fpriv_list_lock);
 864        list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node)
 865                hpriv->hdev = NULL;
 866        mutex_unlock(&hdev->fpriv_list_lock);
 867}
 868
 869/*
 870 * hl_device_reset - reset the device
 871 *
 872 * @hdev: pointer to habanalabs device structure
 873 * @flags: reset flags.
 874 *
 875 * Block future CS and wait for pending CS to be enqueued
 876 * Call ASIC H/W fini
 877 * Flush all completions
 878 * Re-initialize all internal data structures
 879 * Call ASIC H/W init, late_init
 880 * Test queues
 881 * Enable device
 882 *
 883 * Returns 0 for success or an error on failure.
 884 */
 885int hl_device_reset(struct hl_device *hdev, u32 flags)
 886{
 887        u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
 888        bool hard_reset, from_hard_reset_thread, hard_instead_soft = false;
 889        int i, rc;
 890
 891        if (!hdev->init_done) {
 892                dev_err(hdev->dev,
 893                        "Can't reset before initialization is done\n");
 894                return 0;
 895        }
 896
 897        hard_reset = (flags & HL_RESET_HARD) != 0;
 898        from_hard_reset_thread = (flags & HL_RESET_FROM_RESET_THREAD) != 0;
 899
 900        if (!hard_reset && !hdev->supports_soft_reset) {
 901                hard_instead_soft = true;
 902                hard_reset = true;
 903        }
 904
 905        if (hdev->reset_upon_device_release &&
 906                        (flags & HL_RESET_DEVICE_RELEASE)) {
 907                dev_dbg(hdev->dev,
 908                        "Perform %s-reset upon device release\n",
 909                        hard_reset ? "hard" : "soft");
 910                goto do_reset;
 911        }
 912
 913        if (!hard_reset && !hdev->allow_external_soft_reset) {
 914                hard_instead_soft = true;
 915                hard_reset = true;
 916        }
 917
 918        if (hard_instead_soft)
 919                dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n");
 920
 921do_reset:
 922        /* Re-entry of reset thread */
 923        if (from_hard_reset_thread && hdev->process_kill_trial_cnt)
 924                goto kill_processes;
 925
 926        /*
 927         * Prevent concurrency in this function - only one reset should be
 928         * done at any given time. Only need to perform this if we didn't
 929         * get from the dedicated hard reset thread
 930         */
 931        if (!from_hard_reset_thread) {
 932                /* Block future CS/VM/JOB completion operations */
 933                rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
 934                if (rc)
 935                        return 0;
 936
 937                /*
 938                 * 'reset cause' is being updated here, because getting here
 939                 * means that it's the 1st time and the last time we're here
 940                 * ('in_reset' makes sure of it). This makes sure that
 941                 * 'reset_cause' will continue holding its 1st recorded reason!
 942                 */
 943                if (flags & HL_RESET_HEARTBEAT)
 944                        hdev->curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT;
 945                else if (flags & HL_RESET_TDR)
 946                        hdev->curr_reset_cause = HL_RESET_CAUSE_TDR;
 947                else
 948                        hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
 949
 950                /*
 951                 * if reset is due to heartbeat, device CPU is no responsive in
 952                 * which case no point sending PCI disable message to it
 953                 */
 954                if (hard_reset && !(flags & HL_RESET_HEARTBEAT)) {
 955                        /* Disable PCI access from device F/W so he won't send
 956                         * us additional interrupts. We disable MSI/MSI-X at
 957                         * the halt_engines function and we can't have the F/W
 958                         * sending us interrupts after that. We need to disable
 959                         * the access here because if the device is marked
 960                         * disable, the message won't be send. Also, in case
 961                         * of heartbeat, the device CPU is marked as disable
 962                         * so this message won't be sent
 963                         */
 964                        if (hl_fw_send_pci_access_msg(hdev,
 965                                        CPUCP_PACKET_DISABLE_PCI_ACCESS))
 966                                dev_warn(hdev->dev,
 967                                        "Failed to disable PCI access by F/W\n");
 968                }
 969
 970                /* This also blocks future CS/VM/JOB completion operations */
 971                hdev->disabled = true;
 972
 973                /* Flush anyone that is inside the critical section of enqueue
 974                 * jobs to the H/W
 975                 */
 976                hdev->asic_funcs->hw_queues_lock(hdev);
 977                hdev->asic_funcs->hw_queues_unlock(hdev);
 978
 979                /* Flush anyone that is inside device open */
 980                mutex_lock(&hdev->fpriv_list_lock);
 981                mutex_unlock(&hdev->fpriv_list_lock);
 982
 983                dev_err(hdev->dev, "Going to RESET device!\n");
 984        }
 985
 986again:
 987        if ((hard_reset) && (!from_hard_reset_thread)) {
 988                hdev->hard_reset_pending = true;
 989
 990                hdev->process_kill_trial_cnt = 0;
 991
 992                /*
 993                 * Because the reset function can't run from heartbeat work,
 994                 * we need to call the reset function from a dedicated work.
 995                 */
 996                queue_delayed_work(hdev->device_reset_work.wq,
 997                        &hdev->device_reset_work.reset_work, 0);
 998
 999                return 0;
1000        }
1001
1002        if (hard_reset) {
1003                device_late_fini(hdev);
1004
1005                /*
1006                 * Now that the heartbeat thread is closed, flush processes
1007                 * which are sending messages to CPU
1008                 */
1009                mutex_lock(&hdev->send_cpu_message_lock);
1010                mutex_unlock(&hdev->send_cpu_message_lock);
1011        }
1012
1013        /*
1014         * Halt the engines and disable interrupts so we won't get any more
1015         * completions from H/W and we won't have any accesses from the
1016         * H/W to the host machine
1017         */
1018        hdev->asic_funcs->halt_engines(hdev, hard_reset);
1019
1020        /* Go over all the queues, release all CS and their jobs */
1021        hl_cs_rollback_all(hdev);
1022
1023        /* Release all pending user interrupts, each pending user interrupt
1024         * holds a reference to user context
1025         */
1026        hl_release_pending_user_interrupts(hdev);
1027
1028kill_processes:
1029        if (hard_reset) {
1030                /* Kill processes here after CS rollback. This is because the
1031                 * process can't really exit until all its CSs are done, which
1032                 * is what we do in cs rollback
1033                 */
1034                rc = device_kill_open_processes(hdev, 0);
1035
1036                if (rc == -EBUSY) {
1037                        if (hdev->device_fini_pending) {
1038                                dev_crit(hdev->dev,
1039                                        "Failed to kill all open processes, stopping hard reset\n");
1040                                goto out_err;
1041                        }
1042
1043                        /* signal reset thread to reschedule */
1044                        return rc;
1045                }
1046
1047                if (rc) {
1048                        dev_crit(hdev->dev,
1049                                "Failed to kill all open processes, stopping hard reset\n");
1050                        goto out_err;
1051                }
1052
1053                /* Flush the Event queue workers to make sure no other thread is
1054                 * reading or writing to registers during the reset
1055                 */
1056                flush_workqueue(hdev->eq_wq);
1057        }
1058
1059        /* Reset the H/W. It will be in idle state after this returns */
1060        hdev->asic_funcs->hw_fini(hdev, hard_reset);
1061
1062        if (hard_reset) {
1063                /* Release kernel context */
1064                if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1)
1065                        hdev->kernel_ctx = NULL;
1066                hl_vm_fini(hdev);
1067                hl_mmu_fini(hdev);
1068                hl_eq_reset(hdev, &hdev->event_queue);
1069        }
1070
1071        /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */
1072        hl_hw_queue_reset(hdev, hard_reset);
1073        for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1074                hl_cq_reset(hdev, &hdev->completion_queue[i]);
1075
1076        mutex_lock(&hdev->fpriv_list_lock);
1077
1078        /* Make sure the context switch phase will run again */
1079        if (hdev->compute_ctx) {
1080                atomic_set(&hdev->compute_ctx->thread_ctx_switch_token, 1);
1081                hdev->compute_ctx->thread_ctx_switch_wait_token = 0;
1082        }
1083
1084        mutex_unlock(&hdev->fpriv_list_lock);
1085
1086        /* Finished tear-down, starting to re-initialize */
1087
1088        if (hard_reset) {
1089                hdev->device_cpu_disabled = false;
1090                hdev->hard_reset_pending = false;
1091
1092                if (hdev->kernel_ctx) {
1093                        dev_crit(hdev->dev,
1094                                "kernel ctx was alive during hard reset, something is terribly wrong\n");
1095                        rc = -EBUSY;
1096                        goto out_err;
1097                }
1098
1099                rc = hl_mmu_init(hdev);
1100                if (rc) {
1101                        dev_err(hdev->dev,
1102                                "Failed to initialize MMU S/W after hard reset\n");
1103                        goto out_err;
1104                }
1105
1106                /* Allocate the kernel context */
1107                hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
1108                                                GFP_KERNEL);
1109                if (!hdev->kernel_ctx) {
1110                        rc = -ENOMEM;
1111                        hl_mmu_fini(hdev);
1112                        goto out_err;
1113                }
1114
1115                hdev->compute_ctx = NULL;
1116
1117                rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
1118                if (rc) {
1119                        dev_err(hdev->dev,
1120                                "failed to init kernel ctx in hard reset\n");
1121                        kfree(hdev->kernel_ctx);
1122                        hdev->kernel_ctx = NULL;
1123                        hl_mmu_fini(hdev);
1124                        goto out_err;
1125                }
1126        }
1127
1128        /* Device is now enabled as part of the initialization requires
1129         * communication with the device firmware to get information that
1130         * is required for the initialization itself
1131         */
1132        hdev->disabled = false;
1133
1134        rc = hdev->asic_funcs->hw_init(hdev);
1135        if (rc) {
1136                dev_err(hdev->dev,
1137                        "failed to initialize the H/W after reset\n");
1138                goto out_err;
1139        }
1140
1141        /* If device is not idle fail the reset process */
1142        if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
1143                        HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
1144                dev_err(hdev->dev,
1145                        "device is not idle (mask 0x%llx_%llx) after reset\n",
1146                        idle_mask[1], idle_mask[0]);
1147                rc = -EIO;
1148                goto out_err;
1149        }
1150
1151        /* Check that the communication with the device is working */
1152        rc = hdev->asic_funcs->test_queues(hdev);
1153        if (rc) {
1154                dev_err(hdev->dev,
1155                        "Failed to detect if device is alive after reset\n");
1156                goto out_err;
1157        }
1158
1159        if (hard_reset) {
1160                rc = device_late_init(hdev);
1161                if (rc) {
1162                        dev_err(hdev->dev,
1163                                "Failed late init after hard reset\n");
1164                        goto out_err;
1165                }
1166
1167                rc = hl_vm_init(hdev);
1168                if (rc) {
1169                        dev_err(hdev->dev,
1170                                "Failed to init memory module after hard reset\n");
1171                        goto out_err;
1172                }
1173
1174                hl_set_max_power(hdev);
1175        } else {
1176                rc = hdev->asic_funcs->soft_reset_late_init(hdev);
1177                if (rc) {
1178                        dev_err(hdev->dev,
1179                                "Failed late init after soft reset\n");
1180                        goto out_err;
1181                }
1182        }
1183
1184        atomic_set(&hdev->in_reset, 0);
1185        hdev->needs_reset = false;
1186
1187        dev_notice(hdev->dev, "Successfully finished resetting the device\n");
1188
1189        if (hard_reset) {
1190                hdev->hard_reset_cnt++;
1191
1192                /* After reset is done, we are ready to receive events from
1193                 * the F/W. We can't do it before because we will ignore events
1194                 * and if those events are fatal, we won't know about it and
1195                 * the device will be operational although it shouldn't be
1196                 */
1197                hdev->asic_funcs->enable_events_from_fw(hdev);
1198        } else {
1199                hdev->soft_reset_cnt++;
1200        }
1201
1202        return 0;
1203
1204out_err:
1205        hdev->disabled = true;
1206
1207        if (hard_reset) {
1208                dev_err(hdev->dev,
1209                        "Failed to reset! Device is NOT usable\n");
1210                hdev->hard_reset_cnt++;
1211        } else {
1212                dev_err(hdev->dev,
1213                        "Failed to do soft-reset, trying hard reset\n");
1214                hdev->soft_reset_cnt++;
1215                hard_reset = true;
1216                goto again;
1217        }
1218
1219        atomic_set(&hdev->in_reset, 0);
1220
1221        return rc;
1222}
1223
1224/*
1225 * hl_device_init - main initialization function for habanalabs device
1226 *
1227 * @hdev: pointer to habanalabs device structure
1228 *
1229 * Allocate an id for the device, do early initialization and then call the
1230 * ASIC specific initialization functions. Finally, create the cdev and the
1231 * Linux device to expose it to the user
1232 */
1233int hl_device_init(struct hl_device *hdev, struct class *hclass)
1234{
1235        int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt;
1236        char *name;
1237        bool add_cdev_sysfs_on_err = false;
1238
1239        name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2);
1240        if (!name) {
1241                rc = -ENOMEM;
1242                goto out_disabled;
1243        }
1244
1245        /* Initialize cdev and device structures */
1246        rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name,
1247                                &hdev->cdev, &hdev->dev);
1248
1249        kfree(name);
1250
1251        if (rc)
1252                goto out_disabled;
1253
1254        name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2);
1255        if (!name) {
1256                rc = -ENOMEM;
1257                goto free_dev;
1258        }
1259
1260        /* Initialize cdev and device structures for control device */
1261        rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
1262                                name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
1263
1264        kfree(name);
1265
1266        if (rc)
1267                goto free_dev;
1268
1269        /* Initialize ASIC function pointers and perform early init */
1270        rc = device_early_init(hdev);
1271        if (rc)
1272                goto free_dev_ctrl;
1273
1274        user_interrupt_cnt = hdev->asic_prop.user_interrupt_count;
1275
1276        if (user_interrupt_cnt) {
1277                hdev->user_interrupt = kcalloc(user_interrupt_cnt,
1278                                sizeof(*hdev->user_interrupt),
1279                                GFP_KERNEL);
1280
1281                if (!hdev->user_interrupt) {
1282                        rc = -ENOMEM;
1283                        goto early_fini;
1284                }
1285        }
1286
1287        /*
1288         * Start calling ASIC initialization. First S/W then H/W and finally
1289         * late init
1290         */
1291        rc = hdev->asic_funcs->sw_init(hdev);
1292        if (rc)
1293                goto user_interrupts_fini;
1294
1295        /*
1296         * Initialize the H/W queues. Must be done before hw_init, because
1297         * there the addresses of the kernel queue are being written to the
1298         * registers of the device
1299         */
1300        rc = hl_hw_queues_create(hdev);
1301        if (rc) {
1302                dev_err(hdev->dev, "failed to initialize kernel queues\n");
1303                goto sw_fini;
1304        }
1305
1306        cq_cnt = hdev->asic_prop.completion_queues_count;
1307
1308        /*
1309         * Initialize the completion queues. Must be done before hw_init,
1310         * because there the addresses of the completion queues are being
1311         * passed as arguments to request_irq
1312         */
1313        if (cq_cnt) {
1314                hdev->completion_queue = kcalloc(cq_cnt,
1315                                sizeof(*hdev->completion_queue),
1316                                GFP_KERNEL);
1317
1318                if (!hdev->completion_queue) {
1319                        dev_err(hdev->dev,
1320                                "failed to allocate completion queues\n");
1321                        rc = -ENOMEM;
1322                        goto hw_queues_destroy;
1323                }
1324        }
1325
1326        for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
1327                rc = hl_cq_init(hdev, &hdev->completion_queue[i],
1328                                hdev->asic_funcs->get_queue_id_for_cq(hdev, i));
1329                if (rc) {
1330                        dev_err(hdev->dev,
1331                                "failed to initialize completion queue\n");
1332                        goto cq_fini;
1333                }
1334                hdev->completion_queue[i].cq_idx = i;
1335        }
1336
1337        /*
1338         * Initialize the event queue. Must be done before hw_init,
1339         * because there the address of the event queue is being
1340         * passed as argument to request_irq
1341         */
1342        rc = hl_eq_init(hdev, &hdev->event_queue);
1343        if (rc) {
1344                dev_err(hdev->dev, "failed to initialize event queue\n");
1345                goto cq_fini;
1346        }
1347
1348        /* MMU S/W must be initialized before kernel context is created */
1349        rc = hl_mmu_init(hdev);
1350        if (rc) {
1351                dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n");
1352                goto eq_fini;
1353        }
1354
1355        /* Allocate the kernel context */
1356        hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
1357        if (!hdev->kernel_ctx) {
1358                rc = -ENOMEM;
1359                goto mmu_fini;
1360        }
1361
1362        hdev->compute_ctx = NULL;
1363
1364        hl_debugfs_add_device(hdev);
1365
1366        /* debugfs nodes are created in hl_ctx_init so it must be called after
1367         * hl_debugfs_add_device.
1368         */
1369        rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
1370        if (rc) {
1371                dev_err(hdev->dev, "failed to initialize kernel context\n");
1372                kfree(hdev->kernel_ctx);
1373                goto remove_device_from_debugfs;
1374        }
1375
1376        rc = hl_cb_pool_init(hdev);
1377        if (rc) {
1378                dev_err(hdev->dev, "failed to initialize CB pool\n");
1379                goto release_ctx;
1380        }
1381
1382        /*
1383         * From this point, override rc (=0) in case of an error to allow
1384         * debugging (by adding char devices and create sysfs nodes as part of
1385         * the error flow).
1386         */
1387        add_cdev_sysfs_on_err = true;
1388
1389        /* Device is now enabled as part of the initialization requires
1390         * communication with the device firmware to get information that
1391         * is required for the initialization itself
1392         */
1393        hdev->disabled = false;
1394
1395        rc = hdev->asic_funcs->hw_init(hdev);
1396        if (rc) {
1397                dev_err(hdev->dev, "failed to initialize the H/W\n");
1398                rc = 0;
1399                goto out_disabled;
1400        }
1401
1402        /* Check that the communication with the device is working */
1403        rc = hdev->asic_funcs->test_queues(hdev);
1404        if (rc) {
1405                dev_err(hdev->dev, "Failed to detect if device is alive\n");
1406                rc = 0;
1407                goto out_disabled;
1408        }
1409
1410        rc = device_late_init(hdev);
1411        if (rc) {
1412                dev_err(hdev->dev, "Failed late initialization\n");
1413                rc = 0;
1414                goto out_disabled;
1415        }
1416
1417        dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
1418                hdev->asic_name,
1419                hdev->asic_prop.dram_size / SZ_1G);
1420
1421        rc = hl_vm_init(hdev);
1422        if (rc) {
1423                dev_err(hdev->dev, "Failed to initialize memory module\n");
1424                rc = 0;
1425                goto out_disabled;
1426        }
1427
1428        /*
1429         * Expose devices and sysfs nodes to user.
1430         * From here there is no need to add char devices and create sysfs nodes
1431         * in case of an error.
1432         */
1433        add_cdev_sysfs_on_err = false;
1434        rc = device_cdev_sysfs_add(hdev);
1435        if (rc) {
1436                dev_err(hdev->dev,
1437                        "Failed to add char devices and sysfs nodes\n");
1438                rc = 0;
1439                goto out_disabled;
1440        }
1441
1442        /* Need to call this again because the max power might change,
1443         * depending on card type for certain ASICs
1444         */
1445        hl_set_max_power(hdev);
1446
1447        /*
1448         * hl_hwmon_init() must be called after device_late_init(), because only
1449         * there we get the information from the device about which
1450         * hwmon-related sensors the device supports.
1451         * Furthermore, it must be done after adding the device to the system.
1452         */
1453        rc = hl_hwmon_init(hdev);
1454        if (rc) {
1455                dev_err(hdev->dev, "Failed to initialize hwmon\n");
1456                rc = 0;
1457                goto out_disabled;
1458        }
1459
1460        dev_notice(hdev->dev,
1461                "Successfully added device to habanalabs driver\n");
1462
1463        hdev->init_done = true;
1464
1465        /* After initialization is done, we are ready to receive events from
1466         * the F/W. We can't do it before because we will ignore events and if
1467         * those events are fatal, we won't know about it and the device will
1468         * be operational although it shouldn't be
1469         */
1470        hdev->asic_funcs->enable_events_from_fw(hdev);
1471
1472        return 0;
1473
1474release_ctx:
1475        if (hl_ctx_put(hdev->kernel_ctx) != 1)
1476                dev_err(hdev->dev,
1477                        "kernel ctx is still alive on initialization failure\n");
1478remove_device_from_debugfs:
1479        hl_debugfs_remove_device(hdev);
1480mmu_fini:
1481        hl_mmu_fini(hdev);
1482eq_fini:
1483        hl_eq_fini(hdev, &hdev->event_queue);
1484cq_fini:
1485        for (i = 0 ; i < cq_ready_cnt ; i++)
1486                hl_cq_fini(hdev, &hdev->completion_queue[i]);
1487        kfree(hdev->completion_queue);
1488hw_queues_destroy:
1489        hl_hw_queues_destroy(hdev);
1490sw_fini:
1491        hdev->asic_funcs->sw_fini(hdev);
1492user_interrupts_fini:
1493        kfree(hdev->user_interrupt);
1494early_fini:
1495        device_early_fini(hdev);
1496free_dev_ctrl:
1497        put_device(hdev->dev_ctrl);
1498free_dev:
1499        put_device(hdev->dev);
1500out_disabled:
1501        hdev->disabled = true;
1502        if (add_cdev_sysfs_on_err)
1503                device_cdev_sysfs_add(hdev);
1504        if (hdev->pdev)
1505                dev_err(&hdev->pdev->dev,
1506                        "Failed to initialize hl%d. Device is NOT usable !\n",
1507                        hdev->id / 2);
1508        else
1509                pr_err("Failed to initialize hl%d. Device is NOT usable !\n",
1510                        hdev->id / 2);
1511
1512        return rc;
1513}
1514
1515/*
1516 * hl_device_fini - main tear-down function for habanalabs device
1517 *
1518 * @hdev: pointer to habanalabs device structure
1519 *
1520 * Destroy the device, call ASIC fini functions and release the id
1521 */
1522void hl_device_fini(struct hl_device *hdev)
1523{
1524        ktime_t timeout;
1525        u64 reset_sec;
1526        int i, rc;
1527
1528        dev_info(hdev->dev, "Removing device\n");
1529
1530        hdev->device_fini_pending = 1;
1531        flush_delayed_work(&hdev->device_reset_work.reset_work);
1532
1533        if (hdev->pldm)
1534                reset_sec = HL_PLDM_HARD_RESET_MAX_TIMEOUT;
1535        else
1536                reset_sec = HL_HARD_RESET_MAX_TIMEOUT;
1537
1538        /*
1539         * This function is competing with the reset function, so try to
1540         * take the reset atomic and if we are already in middle of reset,
1541         * wait until reset function is finished. Reset function is designed
1542         * to always finish. However, in Gaudi, because of all the network
1543         * ports, the hard reset could take between 10-30 seconds
1544         */
1545
1546        timeout = ktime_add_us(ktime_get(), reset_sec * 1000 * 1000);
1547        rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
1548        while (rc) {
1549                usleep_range(50, 200);
1550                rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
1551                if (ktime_compare(ktime_get(), timeout) > 0) {
1552                        dev_crit(hdev->dev,
1553                                "Failed to remove device because reset function did not finish\n");
1554                        return;
1555                }
1556        }
1557
1558        /* Disable PCI access from device F/W so it won't send us additional
1559         * interrupts. We disable MSI/MSI-X at the halt_engines function and we
1560         * can't have the F/W sending us interrupts after that. We need to
1561         * disable the access here because if the device is marked disable, the
1562         * message won't be send. Also, in case of heartbeat, the device CPU is
1563         * marked as disable so this message won't be sent
1564         */
1565        hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1566
1567        /* Mark device as disabled */
1568        hdev->disabled = true;
1569
1570        /* Flush anyone that is inside the critical section of enqueue
1571         * jobs to the H/W
1572         */
1573        hdev->asic_funcs->hw_queues_lock(hdev);
1574        hdev->asic_funcs->hw_queues_unlock(hdev);
1575
1576        /* Flush anyone that is inside device open */
1577        mutex_lock(&hdev->fpriv_list_lock);
1578        mutex_unlock(&hdev->fpriv_list_lock);
1579
1580        hdev->hard_reset_pending = true;
1581
1582        hl_hwmon_fini(hdev);
1583
1584        device_late_fini(hdev);
1585
1586        /*
1587         * Halt the engines and disable interrupts so we won't get any more
1588         * completions from H/W and we won't have any accesses from the
1589         * H/W to the host machine
1590         */
1591        hdev->asic_funcs->halt_engines(hdev, true);
1592
1593        /* Go over all the queues, release all CS and their jobs */
1594        hl_cs_rollback_all(hdev);
1595
1596        /* Kill processes here after CS rollback. This is because the process
1597         * can't really exit until all its CSs are done, which is what we
1598         * do in cs rollback
1599         */
1600        dev_info(hdev->dev,
1601                "Waiting for all processes to exit (timeout of %u seconds)",
1602                HL_PENDING_RESET_LONG_SEC);
1603
1604        rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC);
1605        if (rc) {
1606                dev_crit(hdev->dev, "Failed to kill all open processes\n");
1607                device_disable_open_processes(hdev);
1608        }
1609
1610        hl_cb_pool_fini(hdev);
1611
1612        /* Reset the H/W. It will be in idle state after this returns */
1613        hdev->asic_funcs->hw_fini(hdev, true);
1614
1615        /* Release kernel context */
1616        if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
1617                dev_err(hdev->dev, "kernel ctx is still alive\n");
1618
1619        hl_debugfs_remove_device(hdev);
1620
1621        hl_vm_fini(hdev);
1622
1623        hl_mmu_fini(hdev);
1624
1625        hl_eq_fini(hdev, &hdev->event_queue);
1626
1627        for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1628                hl_cq_fini(hdev, &hdev->completion_queue[i]);
1629        kfree(hdev->completion_queue);
1630        kfree(hdev->user_interrupt);
1631
1632        hl_hw_queues_destroy(hdev);
1633
1634        /* Call ASIC S/W finalize function */
1635        hdev->asic_funcs->sw_fini(hdev);
1636
1637        device_early_fini(hdev);
1638
1639        /* Hide devices and sysfs nodes from user */
1640        device_cdev_sysfs_del(hdev);
1641
1642        pr_info("removed device successfully\n");
1643}
1644
1645/*
1646 * MMIO register access helper functions.
1647 */
1648
1649/*
1650 * hl_rreg - Read an MMIO register
1651 *
1652 * @hdev: pointer to habanalabs device structure
1653 * @reg: MMIO register offset (in bytes)
1654 *
1655 * Returns the value of the MMIO register we are asked to read
1656 *
1657 */
1658inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
1659{
1660        return readl(hdev->rmmio + reg);
1661}
1662
1663/*
1664 * hl_wreg - Write to an MMIO register
1665 *
1666 * @hdev: pointer to habanalabs device structure
1667 * @reg: MMIO register offset (in bytes)
1668 * @val: 32-bit value
1669 *
1670 * Writes the 32-bit value into the MMIO register
1671 *
1672 */
1673inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
1674{
1675        writel(val, hdev->rmmio + reg);
1676}
1677