linux/drivers/misc/habanalabs/goya/goya.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3/*
   4 * Copyright 2016-2019 HabanaLabs, Ltd.
   5 * All Rights Reserved.
   6 */
   7
   8#include "goyaP.h"
   9#include "../include/hw_ip/mmu/mmu_general.h"
  10#include "../include/hw_ip/mmu/mmu_v1_0.h"
  11#include "../include/goya/asic_reg/goya_masks.h"
  12#include "../include/goya/goya_reg_map.h"
  13
  14#include <linux/pci.h>
  15#include <linux/hwmon.h>
  16#include <linux/iommu.h>
  17#include <linux/seq_file.h>
  18
  19/*
  20 * GOYA security scheme:
  21 *
  22 * 1. Host is protected by:
  23 *        - Range registers (When MMU is enabled, DMA RR does NOT protect host)
  24 *        - MMU
  25 *
  26 * 2. DRAM is protected by:
  27 *        - Range registers (protect the first 512MB)
  28 *        - MMU (isolation between users)
  29 *
  30 * 3. Configuration is protected by:
  31 *        - Range registers
  32 *        - Protection bits
  33 *
  34 * When MMU is disabled:
  35 *
  36 * QMAN DMA: PQ, CQ, CP, DMA are secured.
  37 * PQ, CB and the data are on the host.
  38 *
  39 * QMAN TPC/MME:
  40 * PQ, CQ and CP are not secured.
  41 * PQ, CB and the data are on the SRAM/DRAM.
  42 *
  43 * Since QMAN DMA is secured, the driver is parsing the DMA CB:
  44 *     - checks DMA pointer
  45 *     - WREG, MSG_PROT are not allowed.
  46 *     - MSG_LONG/SHORT are allowed.
  47 *
  48 * A read/write transaction by the QMAN to a protected area will succeed if
  49 * and only if the QMAN's CP is secured and MSG_PROT is used
  50 *
  51 *
  52 * When MMU is enabled:
  53 *
  54 * QMAN DMA: PQ, CQ and CP are secured.
  55 * MMU is set to bypass on the Secure props register of the QMAN.
  56 * The reasons we don't enable MMU for PQ, CQ and CP are:
  57 *     - PQ entry is in kernel address space and the driver doesn't map it.
  58 *     - CP writes to MSIX register and to kernel address space (completion
  59 *       queue).
  60 *
  61 * DMA is not secured but because CP is secured, the driver still needs to parse
  62 * the CB, but doesn't need to check the DMA addresses.
  63 *
  64 * For QMAN DMA 0, DMA is also secured because only the driver uses this DMA and
  65 * the driver doesn't map memory in MMU.
  66 *
  67 * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
  68 *
  69 * DMA RR does NOT protect host because DMA is not secured
  70 *
  71 */
  72
  73#define GOYA_BOOT_FIT_FILE      "habanalabs/goya/goya-boot-fit.itb"
  74#define GOYA_LINUX_FW_FILE      "habanalabs/goya/goya-fit.itb"
  75
  76#define GOYA_MMU_REGS_NUM               63
  77
  78#define GOYA_DMA_POOL_BLK_SIZE          0x100           /* 256 bytes */
  79
  80#define GOYA_RESET_TIMEOUT_MSEC         500             /* 500ms */
  81#define GOYA_PLDM_RESET_TIMEOUT_MSEC    20000           /* 20s */
  82#define GOYA_RESET_WAIT_MSEC            1               /* 1ms */
  83#define GOYA_CPU_RESET_WAIT_MSEC        100             /* 100ms */
  84#define GOYA_PLDM_RESET_WAIT_MSEC       1000            /* 1s */
  85#define GOYA_TEST_QUEUE_WAIT_USEC       100000          /* 100ms */
  86#define GOYA_PLDM_MMU_TIMEOUT_USEC      (MMU_CONFIG_TIMEOUT_USEC * 100)
  87#define GOYA_PLDM_QMAN0_TIMEOUT_USEC    (HL_DEVICE_TIMEOUT_USEC * 30)
  88#define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC  1000000         /* 1s */
  89#define GOYA_MSG_TO_CPU_TIMEOUT_USEC    4000000         /* 4s */
  90
  91#define GOYA_QMAN0_FENCE_VAL            0xD169B243
  92
  93#define GOYA_MAX_STRING_LEN             20
  94
  95#define GOYA_CB_POOL_CB_CNT             512
  96#define GOYA_CB_POOL_CB_SIZE            0x20000         /* 128KB */
  97
  98#define IS_QM_IDLE(engine, qm_glbl_sts0) \
  99        (((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
 100#define IS_DMA_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(DMA, qm_glbl_sts0)
 101#define IS_TPC_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(TPC, qm_glbl_sts0)
 102#define IS_MME_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(MME, qm_glbl_sts0)
 103
 104#define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
 105        (((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
 106                        engine##_CMDQ_IDLE_MASK)
 107#define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
 108        IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
 109#define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
 110        IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
 111
 112#define IS_DMA_IDLE(dma_core_sts0) \
 113        !((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
 114
 115#define IS_TPC_IDLE(tpc_cfg_sts) \
 116        (((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
 117
 118#define IS_MME_IDLE(mme_arch_sts) \
 119        (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
 120
 121static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
 122                "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
 123                "goya cq 4", "goya cpu eq"
 124};
 125
 126static u16 goya_packet_sizes[MAX_PACKET_ID] = {
 127        [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
 128        [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
 129        [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
 130        [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
 131        [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
 132        [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
 133        [PACKET_FENCE]          = sizeof(struct packet_fence),
 134        [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
 135        [PACKET_NOP]            = sizeof(struct packet_nop),
 136        [PACKET_STOP]           = sizeof(struct packet_stop)
 137};
 138
 139static inline bool validate_packet_id(enum packet_id id)
 140{
 141        switch (id) {
 142        case PACKET_WREG_32:
 143        case PACKET_WREG_BULK:
 144        case PACKET_MSG_LONG:
 145        case PACKET_MSG_SHORT:
 146        case PACKET_CP_DMA:
 147        case PACKET_MSG_PROT:
 148        case PACKET_FENCE:
 149        case PACKET_LIN_DMA:
 150        case PACKET_NOP:
 151        case PACKET_STOP:
 152                return true;
 153        default:
 154                return false;
 155        }
 156}
 157
 158static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = {
 159        mmDMA_QM_0_GLBL_NON_SECURE_PROPS,
 160        mmDMA_QM_1_GLBL_NON_SECURE_PROPS,
 161        mmDMA_QM_2_GLBL_NON_SECURE_PROPS,
 162        mmDMA_QM_3_GLBL_NON_SECURE_PROPS,
 163        mmDMA_QM_4_GLBL_NON_SECURE_PROPS,
 164        mmTPC0_QM_GLBL_SECURE_PROPS,
 165        mmTPC0_QM_GLBL_NON_SECURE_PROPS,
 166        mmTPC0_CMDQ_GLBL_SECURE_PROPS,
 167        mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS,
 168        mmTPC0_CFG_ARUSER,
 169        mmTPC0_CFG_AWUSER,
 170        mmTPC1_QM_GLBL_SECURE_PROPS,
 171        mmTPC1_QM_GLBL_NON_SECURE_PROPS,
 172        mmTPC1_CMDQ_GLBL_SECURE_PROPS,
 173        mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS,
 174        mmTPC1_CFG_ARUSER,
 175        mmTPC1_CFG_AWUSER,
 176        mmTPC2_QM_GLBL_SECURE_PROPS,
 177        mmTPC2_QM_GLBL_NON_SECURE_PROPS,
 178        mmTPC2_CMDQ_GLBL_SECURE_PROPS,
 179        mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS,
 180        mmTPC2_CFG_ARUSER,
 181        mmTPC2_CFG_AWUSER,
 182        mmTPC3_QM_GLBL_SECURE_PROPS,
 183        mmTPC3_QM_GLBL_NON_SECURE_PROPS,
 184        mmTPC3_CMDQ_GLBL_SECURE_PROPS,
 185        mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS,
 186        mmTPC3_CFG_ARUSER,
 187        mmTPC3_CFG_AWUSER,
 188        mmTPC4_QM_GLBL_SECURE_PROPS,
 189        mmTPC4_QM_GLBL_NON_SECURE_PROPS,
 190        mmTPC4_CMDQ_GLBL_SECURE_PROPS,
 191        mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS,
 192        mmTPC4_CFG_ARUSER,
 193        mmTPC4_CFG_AWUSER,
 194        mmTPC5_QM_GLBL_SECURE_PROPS,
 195        mmTPC5_QM_GLBL_NON_SECURE_PROPS,
 196        mmTPC5_CMDQ_GLBL_SECURE_PROPS,
 197        mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS,
 198        mmTPC5_CFG_ARUSER,
 199        mmTPC5_CFG_AWUSER,
 200        mmTPC6_QM_GLBL_SECURE_PROPS,
 201        mmTPC6_QM_GLBL_NON_SECURE_PROPS,
 202        mmTPC6_CMDQ_GLBL_SECURE_PROPS,
 203        mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS,
 204        mmTPC6_CFG_ARUSER,
 205        mmTPC6_CFG_AWUSER,
 206        mmTPC7_QM_GLBL_SECURE_PROPS,
 207        mmTPC7_QM_GLBL_NON_SECURE_PROPS,
 208        mmTPC7_CMDQ_GLBL_SECURE_PROPS,
 209        mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS,
 210        mmTPC7_CFG_ARUSER,
 211        mmTPC7_CFG_AWUSER,
 212        mmMME_QM_GLBL_SECURE_PROPS,
 213        mmMME_QM_GLBL_NON_SECURE_PROPS,
 214        mmMME_CMDQ_GLBL_SECURE_PROPS,
 215        mmMME_CMDQ_GLBL_NON_SECURE_PROPS,
 216        mmMME_SBA_CONTROL_DATA,
 217        mmMME_SBB_CONTROL_DATA,
 218        mmMME_SBC_CONTROL_DATA,
 219        mmMME_WBC_CONTROL_DATA,
 220        mmPCIE_WRAP_PSOC_ARUSER,
 221        mmPCIE_WRAP_PSOC_AWUSER
 222};
 223
 224static u32 goya_all_events[] = {
 225        GOYA_ASYNC_EVENT_ID_PCIE_IF,
 226        GOYA_ASYNC_EVENT_ID_TPC0_ECC,
 227        GOYA_ASYNC_EVENT_ID_TPC1_ECC,
 228        GOYA_ASYNC_EVENT_ID_TPC2_ECC,
 229        GOYA_ASYNC_EVENT_ID_TPC3_ECC,
 230        GOYA_ASYNC_EVENT_ID_TPC4_ECC,
 231        GOYA_ASYNC_EVENT_ID_TPC5_ECC,
 232        GOYA_ASYNC_EVENT_ID_TPC6_ECC,
 233        GOYA_ASYNC_EVENT_ID_TPC7_ECC,
 234        GOYA_ASYNC_EVENT_ID_MME_ECC,
 235        GOYA_ASYNC_EVENT_ID_MME_ECC_EXT,
 236        GOYA_ASYNC_EVENT_ID_MMU_ECC,
 237        GOYA_ASYNC_EVENT_ID_DMA_MACRO,
 238        GOYA_ASYNC_EVENT_ID_DMA_ECC,
 239        GOYA_ASYNC_EVENT_ID_CPU_IF_ECC,
 240        GOYA_ASYNC_EVENT_ID_PSOC_MEM,
 241        GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT,
 242        GOYA_ASYNC_EVENT_ID_SRAM0,
 243        GOYA_ASYNC_EVENT_ID_SRAM1,
 244        GOYA_ASYNC_EVENT_ID_SRAM2,
 245        GOYA_ASYNC_EVENT_ID_SRAM3,
 246        GOYA_ASYNC_EVENT_ID_SRAM4,
 247        GOYA_ASYNC_EVENT_ID_SRAM5,
 248        GOYA_ASYNC_EVENT_ID_SRAM6,
 249        GOYA_ASYNC_EVENT_ID_SRAM7,
 250        GOYA_ASYNC_EVENT_ID_SRAM8,
 251        GOYA_ASYNC_EVENT_ID_SRAM9,
 252        GOYA_ASYNC_EVENT_ID_SRAM10,
 253        GOYA_ASYNC_EVENT_ID_SRAM11,
 254        GOYA_ASYNC_EVENT_ID_SRAM12,
 255        GOYA_ASYNC_EVENT_ID_SRAM13,
 256        GOYA_ASYNC_EVENT_ID_SRAM14,
 257        GOYA_ASYNC_EVENT_ID_SRAM15,
 258        GOYA_ASYNC_EVENT_ID_SRAM16,
 259        GOYA_ASYNC_EVENT_ID_SRAM17,
 260        GOYA_ASYNC_EVENT_ID_SRAM18,
 261        GOYA_ASYNC_EVENT_ID_SRAM19,
 262        GOYA_ASYNC_EVENT_ID_SRAM20,
 263        GOYA_ASYNC_EVENT_ID_SRAM21,
 264        GOYA_ASYNC_EVENT_ID_SRAM22,
 265        GOYA_ASYNC_EVENT_ID_SRAM23,
 266        GOYA_ASYNC_EVENT_ID_SRAM24,
 267        GOYA_ASYNC_EVENT_ID_SRAM25,
 268        GOYA_ASYNC_EVENT_ID_SRAM26,
 269        GOYA_ASYNC_EVENT_ID_SRAM27,
 270        GOYA_ASYNC_EVENT_ID_SRAM28,
 271        GOYA_ASYNC_EVENT_ID_SRAM29,
 272        GOYA_ASYNC_EVENT_ID_GIC500,
 273        GOYA_ASYNC_EVENT_ID_PLL0,
 274        GOYA_ASYNC_EVENT_ID_PLL1,
 275        GOYA_ASYNC_EVENT_ID_PLL3,
 276        GOYA_ASYNC_EVENT_ID_PLL4,
 277        GOYA_ASYNC_EVENT_ID_PLL5,
 278        GOYA_ASYNC_EVENT_ID_PLL6,
 279        GOYA_ASYNC_EVENT_ID_AXI_ECC,
 280        GOYA_ASYNC_EVENT_ID_L2_RAM_ECC,
 281        GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET,
 282        GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT,
 283        GOYA_ASYNC_EVENT_ID_PCIE_DEC,
 284        GOYA_ASYNC_EVENT_ID_TPC0_DEC,
 285        GOYA_ASYNC_EVENT_ID_TPC1_DEC,
 286        GOYA_ASYNC_EVENT_ID_TPC2_DEC,
 287        GOYA_ASYNC_EVENT_ID_TPC3_DEC,
 288        GOYA_ASYNC_EVENT_ID_TPC4_DEC,
 289        GOYA_ASYNC_EVENT_ID_TPC5_DEC,
 290        GOYA_ASYNC_EVENT_ID_TPC6_DEC,
 291        GOYA_ASYNC_EVENT_ID_TPC7_DEC,
 292        GOYA_ASYNC_EVENT_ID_MME_WACS,
 293        GOYA_ASYNC_EVENT_ID_MME_WACSD,
 294        GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER,
 295        GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC,
 296        GOYA_ASYNC_EVENT_ID_PSOC,
 297        GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR,
 298        GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR,
 299        GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR,
 300        GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR,
 301        GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR,
 302        GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR,
 303        GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR,
 304        GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR,
 305        GOYA_ASYNC_EVENT_ID_TPC0_CMDQ,
 306        GOYA_ASYNC_EVENT_ID_TPC1_CMDQ,
 307        GOYA_ASYNC_EVENT_ID_TPC2_CMDQ,
 308        GOYA_ASYNC_EVENT_ID_TPC3_CMDQ,
 309        GOYA_ASYNC_EVENT_ID_TPC4_CMDQ,
 310        GOYA_ASYNC_EVENT_ID_TPC5_CMDQ,
 311        GOYA_ASYNC_EVENT_ID_TPC6_CMDQ,
 312        GOYA_ASYNC_EVENT_ID_TPC7_CMDQ,
 313        GOYA_ASYNC_EVENT_ID_TPC0_QM,
 314        GOYA_ASYNC_EVENT_ID_TPC1_QM,
 315        GOYA_ASYNC_EVENT_ID_TPC2_QM,
 316        GOYA_ASYNC_EVENT_ID_TPC3_QM,
 317        GOYA_ASYNC_EVENT_ID_TPC4_QM,
 318        GOYA_ASYNC_EVENT_ID_TPC5_QM,
 319        GOYA_ASYNC_EVENT_ID_TPC6_QM,
 320        GOYA_ASYNC_EVENT_ID_TPC7_QM,
 321        GOYA_ASYNC_EVENT_ID_MME_QM,
 322        GOYA_ASYNC_EVENT_ID_MME_CMDQ,
 323        GOYA_ASYNC_EVENT_ID_DMA0_QM,
 324        GOYA_ASYNC_EVENT_ID_DMA1_QM,
 325        GOYA_ASYNC_EVENT_ID_DMA2_QM,
 326        GOYA_ASYNC_EVENT_ID_DMA3_QM,
 327        GOYA_ASYNC_EVENT_ID_DMA4_QM,
 328        GOYA_ASYNC_EVENT_ID_DMA0_CH,
 329        GOYA_ASYNC_EVENT_ID_DMA1_CH,
 330        GOYA_ASYNC_EVENT_ID_DMA2_CH,
 331        GOYA_ASYNC_EVENT_ID_DMA3_CH,
 332        GOYA_ASYNC_EVENT_ID_DMA4_CH,
 333        GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU,
 334        GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU,
 335        GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU,
 336        GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU,
 337        GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU,
 338        GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU,
 339        GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU,
 340        GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU,
 341        GOYA_ASYNC_EVENT_ID_DMA_BM_CH0,
 342        GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
 343        GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
 344        GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
 345        GOYA_ASYNC_EVENT_ID_DMA_BM_CH4,
 346        GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S,
 347        GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E,
 348        GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S,
 349        GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
 350};
 351
 352static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
 353static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
 354static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
 355static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
 356
 357int goya_get_fixed_properties(struct hl_device *hdev)
 358{
 359        struct asic_fixed_properties *prop = &hdev->asic_prop;
 360        int i;
 361
 362        prop->max_queues = GOYA_QUEUE_ID_SIZE;
 363        prop->hw_queues_props = kcalloc(prop->max_queues,
 364                        sizeof(struct hw_queue_properties),
 365                        GFP_KERNEL);
 366
 367        if (!prop->hw_queues_props)
 368                return -ENOMEM;
 369
 370        for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
 371                prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
 372                prop->hw_queues_props[i].driver_only = 0;
 373                prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL;
 374        }
 375
 376        for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
 377                prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
 378                prop->hw_queues_props[i].driver_only = 1;
 379                prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL;
 380        }
 381
 382        for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
 383                        NUMBER_OF_INT_HW_QUEUES; i++) {
 384                prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
 385                prop->hw_queues_props[i].driver_only = 0;
 386                prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_USER;
 387        }
 388
 389        prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
 390
 391        prop->dram_base_address = DRAM_PHYS_BASE;
 392        prop->dram_size = DRAM_PHYS_DEFAULT_SIZE;
 393        prop->dram_end_address = prop->dram_base_address + prop->dram_size;
 394        prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
 395
 396        prop->sram_base_address = SRAM_BASE_ADDR;
 397        prop->sram_size = SRAM_SIZE;
 398        prop->sram_end_address = prop->sram_base_address + prop->sram_size;
 399        prop->sram_user_base_address = prop->sram_base_address +
 400                                                SRAM_USER_BASE_OFFSET;
 401
 402        prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
 403        prop->mmu_dram_default_page_addr = MMU_DRAM_DEFAULT_PAGE_ADDR;
 404        if (hdev->pldm)
 405                prop->mmu_pgt_size = 0x800000; /* 8MB */
 406        else
 407                prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
 408        prop->mmu_pte_size = HL_PTE_SIZE;
 409        prop->mmu_hop_table_size = HOP_TABLE_SIZE;
 410        prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
 411        prop->dram_page_size = PAGE_SIZE_2MB;
 412        prop->dram_supports_virtual_memory = true;
 413
 414        prop->dmmu.hop0_shift = HOP0_SHIFT;
 415        prop->dmmu.hop1_shift = HOP1_SHIFT;
 416        prop->dmmu.hop2_shift = HOP2_SHIFT;
 417        prop->dmmu.hop3_shift = HOP3_SHIFT;
 418        prop->dmmu.hop4_shift = HOP4_SHIFT;
 419        prop->dmmu.hop0_mask = HOP0_MASK;
 420        prop->dmmu.hop1_mask = HOP1_MASK;
 421        prop->dmmu.hop2_mask = HOP2_MASK;
 422        prop->dmmu.hop3_mask = HOP3_MASK;
 423        prop->dmmu.hop4_mask = HOP4_MASK;
 424        prop->dmmu.start_addr = VA_DDR_SPACE_START;
 425        prop->dmmu.end_addr = VA_DDR_SPACE_END;
 426        prop->dmmu.page_size = PAGE_SIZE_2MB;
 427        prop->dmmu.num_hops = MMU_ARCH_5_HOPS;
 428
 429        /* shifts and masks are the same in PMMU and DMMU */
 430        memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
 431        prop->pmmu.start_addr = VA_HOST_SPACE_START;
 432        prop->pmmu.end_addr = VA_HOST_SPACE_END;
 433        prop->pmmu.page_size = PAGE_SIZE_4KB;
 434        prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
 435
 436        /* PMMU and HPMMU are the same except of page size */
 437        memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
 438        prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
 439
 440        prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
 441        prop->cfg_size = CFG_SIZE;
 442        prop->max_asid = MAX_ASID;
 443        prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
 444        prop->high_pll = PLL_HIGH_DEFAULT;
 445        prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT;
 446        prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE;
 447        prop->max_power_default = MAX_POWER_DEFAULT;
 448        prop->dc_power_default = DC_POWER_DEFAULT;
 449        prop->tpc_enabled_mask = TPC_ENABLED_MASK;
 450        prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
 451        prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
 452
 453        strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
 454                CARD_NAME_MAX_LEN);
 455
 456        prop->max_pending_cs = GOYA_MAX_PENDING_CS;
 457
 458        prop->first_available_user_msix_interrupt = USHRT_MAX;
 459
 460        for (i = 0 ; i < HL_MAX_DCORES ; i++)
 461                prop->first_available_cq[i] = USHRT_MAX;
 462
 463        prop->fw_security_status_valid = false;
 464        prop->hard_reset_done_by_fw = false;
 465
 466        return 0;
 467}
 468
 469/*
 470 * goya_pci_bars_map - Map PCI BARS of Goya device
 471 *
 472 * @hdev: pointer to hl_device structure
 473 *
 474 * Request PCI regions and map them to kernel virtual addresses.
 475 * Returns 0 on success
 476 *
 477 */
 478static int goya_pci_bars_map(struct hl_device *hdev)
 479{
 480        static const char * const name[] = {"SRAM_CFG", "MSIX", "DDR"};
 481        bool is_wc[3] = {false, false, true};
 482        int rc;
 483
 484        rc = hl_pci_bars_map(hdev, name, is_wc);
 485        if (rc)
 486                return rc;
 487
 488        hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] +
 489                        (CFG_BASE - SRAM_BASE_ADDR);
 490
 491        return 0;
 492}
 493
 494static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
 495{
 496        struct goya_device *goya = hdev->asic_specific;
 497        struct hl_inbound_pci_region pci_region;
 498        u64 old_addr = addr;
 499        int rc;
 500
 501        if ((goya) && (goya->ddr_bar_cur_addr == addr))
 502                return old_addr;
 503
 504        /* Inbound Region 1 - Bar 4 - Point to DDR */
 505        pci_region.mode = PCI_BAR_MATCH_MODE;
 506        pci_region.bar = DDR_BAR_ID;
 507        pci_region.addr = addr;
 508        rc = hl_pci_set_inbound_region(hdev, 1, &pci_region);
 509        if (rc)
 510                return U64_MAX;
 511
 512        if (goya) {
 513                old_addr = goya->ddr_bar_cur_addr;
 514                goya->ddr_bar_cur_addr = addr;
 515        }
 516
 517        return old_addr;
 518}
 519
 520/*
 521 * goya_init_iatu - Initialize the iATU unit inside the PCI controller
 522 *
 523 * @hdev: pointer to hl_device structure
 524 *
 525 * This is needed in case the firmware doesn't initialize the iATU
 526 *
 527 */
 528static int goya_init_iatu(struct hl_device *hdev)
 529{
 530        struct hl_inbound_pci_region inbound_region;
 531        struct hl_outbound_pci_region outbound_region;
 532        int rc;
 533
 534        if (hdev->asic_prop.iatu_done_by_fw) {
 535                hdev->asic_funcs->set_dma_mask_from_fw(hdev);
 536                return 0;
 537        }
 538
 539        /* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */
 540        inbound_region.mode = PCI_BAR_MATCH_MODE;
 541        inbound_region.bar = SRAM_CFG_BAR_ID;
 542        inbound_region.addr = SRAM_BASE_ADDR;
 543        rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
 544        if (rc)
 545                goto done;
 546
 547        /* Inbound Region 1 - Bar 4 - Point to DDR */
 548        inbound_region.mode = PCI_BAR_MATCH_MODE;
 549        inbound_region.bar = DDR_BAR_ID;
 550        inbound_region.addr = DRAM_PHYS_BASE;
 551        rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
 552        if (rc)
 553                goto done;
 554
 555        hdev->asic_funcs->set_dma_mask_from_fw(hdev);
 556
 557        /* Outbound Region 0 - Point to Host  */
 558        outbound_region.addr = HOST_PHYS_BASE;
 559        outbound_region.size = HOST_PHYS_SIZE;
 560        rc = hl_pci_set_outbound_region(hdev, &outbound_region);
 561
 562done:
 563        return rc;
 564}
 565
 566static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
 567{
 568        return RREG32(mmHW_STATE);
 569}
 570
 571/*
 572 * goya_early_init - GOYA early initialization code
 573 *
 574 * @hdev: pointer to hl_device structure
 575 *
 576 * Verify PCI bars
 577 * Set DMA masks
 578 * PCI controller initialization
 579 * Map PCI bars
 580 *
 581 */
 582static int goya_early_init(struct hl_device *hdev)
 583{
 584        struct asic_fixed_properties *prop = &hdev->asic_prop;
 585        struct pci_dev *pdev = hdev->pdev;
 586        u32 fw_boot_status, val;
 587        int rc;
 588
 589        rc = goya_get_fixed_properties(hdev);
 590        if (rc) {
 591                dev_err(hdev->dev, "Failed to get fixed properties\n");
 592                return rc;
 593        }
 594
 595        /* Check BAR sizes */
 596        if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) {
 597                dev_err(hdev->dev,
 598                        "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
 599                        SRAM_CFG_BAR_ID,
 600                        (unsigned long long) pci_resource_len(pdev,
 601                                                        SRAM_CFG_BAR_ID),
 602                        CFG_BAR_SIZE);
 603                rc = -ENODEV;
 604                goto free_queue_props;
 605        }
 606
 607        if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) {
 608                dev_err(hdev->dev,
 609                        "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
 610                        MSIX_BAR_ID,
 611                        (unsigned long long) pci_resource_len(pdev,
 612                                                                MSIX_BAR_ID),
 613                        MSIX_BAR_SIZE);
 614                rc = -ENODEV;
 615                goto free_queue_props;
 616        }
 617
 618        prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
 619
 620        /* If FW security is enabled at this point it means no access to ELBI */
 621        if (!hdev->asic_prop.fw_security_disabled) {
 622                hdev->asic_prop.iatu_done_by_fw = true;
 623                goto pci_init;
 624        }
 625
 626        rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
 627                                &fw_boot_status);
 628        if (rc)
 629                goto free_queue_props;
 630
 631        /* Check whether FW is configuring iATU */
 632        if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
 633                        (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
 634                hdev->asic_prop.iatu_done_by_fw = true;
 635
 636pci_init:
 637        rc = hl_pci_init(hdev);
 638        if (rc)
 639                goto free_queue_props;
 640
 641        /* Before continuing in the initialization, we need to read the preboot
 642         * version to determine whether we run with a security-enabled firmware
 643         */
 644        rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
 645                        mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
 646                        GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
 647        if (rc) {
 648                if (hdev->reset_on_preboot_fail)
 649                        hdev->asic_funcs->hw_fini(hdev, true);
 650                goto pci_fini;
 651        }
 652
 653        if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
 654                dev_info(hdev->dev,
 655                        "H/W state is dirty, must reset before initializing\n");
 656                hdev->asic_funcs->hw_fini(hdev, true);
 657        }
 658
 659        if (!hdev->pldm) {
 660                val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
 661                if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
 662                        dev_warn(hdev->dev,
 663                                "PCI strap is not configured correctly, PCI bus errors may occur\n");
 664        }
 665
 666        return 0;
 667
 668pci_fini:
 669        hl_pci_fini(hdev);
 670free_queue_props:
 671        kfree(hdev->asic_prop.hw_queues_props);
 672        return rc;
 673}
 674
 675/*
 676 * goya_early_fini - GOYA early finalization code
 677 *
 678 * @hdev: pointer to hl_device structure
 679 *
 680 * Unmap PCI bars
 681 *
 682 */
 683static int goya_early_fini(struct hl_device *hdev)
 684{
 685        kfree(hdev->asic_prop.hw_queues_props);
 686        hl_pci_fini(hdev);
 687
 688        return 0;
 689}
 690
 691static void goya_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
 692{
 693        /* mask to zero the MMBP and ASID bits */
 694        WREG32_AND(reg, ~0x7FF);
 695        WREG32_OR(reg, asid);
 696}
 697
 698static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
 699{
 700        struct goya_device *goya = hdev->asic_specific;
 701
 702        if (!(goya->hw_cap_initialized & HW_CAP_MMU))
 703                return;
 704
 705        if (secure)
 706                WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED);
 707        else
 708                WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED);
 709
 710        RREG32(mmDMA_QM_0_GLBL_PROT);
 711}
 712
 713/*
 714 * goya_fetch_psoc_frequency - Fetch PSOC frequency values
 715 *
 716 * @hdev: pointer to hl_device structure
 717 *
 718 */
 719static void goya_fetch_psoc_frequency(struct hl_device *hdev)
 720{
 721        struct asic_fixed_properties *prop = &hdev->asic_prop;
 722        u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
 723        u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
 724        int rc;
 725
 726        if (hdev->asic_prop.fw_security_disabled) {
 727                div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
 728                div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
 729                nr = RREG32(mmPSOC_PCI_PLL_NR);
 730                nf = RREG32(mmPSOC_PCI_PLL_NF);
 731                od = RREG32(mmPSOC_PCI_PLL_OD);
 732
 733                if (div_sel == DIV_SEL_REF_CLK ||
 734                                div_sel == DIV_SEL_DIVIDED_REF) {
 735                        if (div_sel == DIV_SEL_REF_CLK)
 736                                freq = PLL_REF_CLK;
 737                        else
 738                                freq = PLL_REF_CLK / (div_fctr + 1);
 739                } else if (div_sel == DIV_SEL_PLL_CLK ||
 740                                div_sel == DIV_SEL_DIVIDED_PLL) {
 741                        pll_clk = PLL_REF_CLK * (nf + 1) /
 742                                        ((nr + 1) * (od + 1));
 743                        if (div_sel == DIV_SEL_PLL_CLK)
 744                                freq = pll_clk;
 745                        else
 746                                freq = pll_clk / (div_fctr + 1);
 747                } else {
 748                        dev_warn(hdev->dev,
 749                                "Received invalid div select value: %d",
 750                                div_sel);
 751                        freq = 0;
 752                }
 753        } else {
 754                rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL,
 755                                pll_freq_arr);
 756
 757                if (rc)
 758                        return;
 759
 760                freq = pll_freq_arr[1];
 761        }
 762
 763        prop->psoc_timestamp_frequency = freq;
 764        prop->psoc_pci_pll_nr = nr;
 765        prop->psoc_pci_pll_nf = nf;
 766        prop->psoc_pci_pll_od = od;
 767        prop->psoc_pci_pll_div_factor = div_fctr;
 768}
 769
 770int goya_late_init(struct hl_device *hdev)
 771{
 772        struct asic_fixed_properties *prop = &hdev->asic_prop;
 773        int rc;
 774
 775        goya_fetch_psoc_frequency(hdev);
 776
 777        rc = goya_mmu_clear_pgt_range(hdev);
 778        if (rc) {
 779                dev_err(hdev->dev,
 780                        "Failed to clear MMU page tables range %d\n", rc);
 781                return rc;
 782        }
 783
 784        rc = goya_mmu_set_dram_default_page(hdev);
 785        if (rc) {
 786                dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc);
 787                return rc;
 788        }
 789
 790        rc = goya_mmu_add_mappings_for_device_cpu(hdev);
 791        if (rc)
 792                return rc;
 793
 794        rc = goya_init_cpu_queues(hdev);
 795        if (rc)
 796                return rc;
 797
 798        rc = goya_test_cpu_queue(hdev);
 799        if (rc)
 800                return rc;
 801
 802        rc = goya_cpucp_info_get(hdev);
 803        if (rc) {
 804                dev_err(hdev->dev, "Failed to get cpucp info %d\n", rc);
 805                return rc;
 806        }
 807
 808        /* Now that we have the DRAM size in ASIC prop, we need to check
 809         * its size and configure the DMA_IF DDR wrap protection (which is in
 810         * the MMU block) accordingly. The value is the log2 of the DRAM size
 811         */
 812        WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
 813
 814        rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
 815        if (rc) {
 816                dev_err(hdev->dev,
 817                        "Failed to enable PCI access from CPU %d\n", rc);
 818                return rc;
 819        }
 820
 821        return 0;
 822}
 823
 824/*
 825 * goya_late_fini - GOYA late tear-down code
 826 *
 827 * @hdev: pointer to hl_device structure
 828 *
 829 * Free sensors allocated structures
 830 */
 831void goya_late_fini(struct hl_device *hdev)
 832{
 833        const struct hwmon_channel_info **channel_info_arr;
 834        int i = 0;
 835
 836        if (!hdev->hl_chip_info->info)
 837                return;
 838
 839        channel_info_arr = hdev->hl_chip_info->info;
 840
 841        while (channel_info_arr[i]) {
 842                kfree(channel_info_arr[i]->config);
 843                kfree(channel_info_arr[i]);
 844                i++;
 845        }
 846
 847        kfree(channel_info_arr);
 848
 849        hdev->hl_chip_info->info = NULL;
 850}
 851
 852/*
 853 * goya_sw_init - Goya software initialization code
 854 *
 855 * @hdev: pointer to hl_device structure
 856 *
 857 */
 858static int goya_sw_init(struct hl_device *hdev)
 859{
 860        struct goya_device *goya;
 861        int rc;
 862
 863        /* Allocate device structure */
 864        goya = kzalloc(sizeof(*goya), GFP_KERNEL);
 865        if (!goya)
 866                return -ENOMEM;
 867
 868        /* according to goya_init_iatu */
 869        goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
 870
 871        goya->mme_clk = GOYA_PLL_FREQ_LOW;
 872        goya->tpc_clk = GOYA_PLL_FREQ_LOW;
 873        goya->ic_clk = GOYA_PLL_FREQ_LOW;
 874
 875        hdev->asic_specific = goya;
 876
 877        /* Create DMA pool for small allocations */
 878        hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
 879                        &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
 880        if (!hdev->dma_pool) {
 881                dev_err(hdev->dev, "failed to create DMA pool\n");
 882                rc = -ENOMEM;
 883                goto free_goya_device;
 884        }
 885
 886        hdev->cpu_accessible_dma_mem =
 887                        hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
 888                                        HL_CPU_ACCESSIBLE_MEM_SIZE,
 889                                        &hdev->cpu_accessible_dma_address,
 890                                        GFP_KERNEL | __GFP_ZERO);
 891
 892        if (!hdev->cpu_accessible_dma_mem) {
 893                rc = -ENOMEM;
 894                goto free_dma_pool;
 895        }
 896
 897        dev_dbg(hdev->dev, "cpu accessible memory at bus address %pad\n",
 898                &hdev->cpu_accessible_dma_address);
 899
 900        hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
 901        if (!hdev->cpu_accessible_dma_pool) {
 902                dev_err(hdev->dev,
 903                        "Failed to create CPU accessible DMA pool\n");
 904                rc = -ENOMEM;
 905                goto free_cpu_dma_mem;
 906        }
 907
 908        rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
 909                                (uintptr_t) hdev->cpu_accessible_dma_mem,
 910                                HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
 911        if (rc) {
 912                dev_err(hdev->dev,
 913                        "Failed to add memory to CPU accessible DMA pool\n");
 914                rc = -EFAULT;
 915                goto free_cpu_accessible_dma_pool;
 916        }
 917
 918        spin_lock_init(&goya->hw_queues_lock);
 919        hdev->supports_coresight = true;
 920        hdev->supports_soft_reset = true;
 921
 922        return 0;
 923
 924free_cpu_accessible_dma_pool:
 925        gen_pool_destroy(hdev->cpu_accessible_dma_pool);
 926free_cpu_dma_mem:
 927        hdev->asic_funcs->asic_dma_free_coherent(hdev,
 928                        HL_CPU_ACCESSIBLE_MEM_SIZE,
 929                        hdev->cpu_accessible_dma_mem,
 930                        hdev->cpu_accessible_dma_address);
 931free_dma_pool:
 932        dma_pool_destroy(hdev->dma_pool);
 933free_goya_device:
 934        kfree(goya);
 935
 936        return rc;
 937}
 938
 939/*
 940 * goya_sw_fini - Goya software tear-down code
 941 *
 942 * @hdev: pointer to hl_device structure
 943 *
 944 */
 945static int goya_sw_fini(struct hl_device *hdev)
 946{
 947        struct goya_device *goya = hdev->asic_specific;
 948
 949        gen_pool_destroy(hdev->cpu_accessible_dma_pool);
 950
 951        hdev->asic_funcs->asic_dma_free_coherent(hdev,
 952                        HL_CPU_ACCESSIBLE_MEM_SIZE,
 953                        hdev->cpu_accessible_dma_mem,
 954                        hdev->cpu_accessible_dma_address);
 955
 956        dma_pool_destroy(hdev->dma_pool);
 957
 958        kfree(goya);
 959
 960        return 0;
 961}
 962
 963static void goya_init_dma_qman(struct hl_device *hdev, int dma_id,
 964                dma_addr_t bus_address)
 965{
 966        struct goya_device *goya = hdev->asic_specific;
 967        u32 mtr_base_lo, mtr_base_hi;
 968        u32 so_base_lo, so_base_hi;
 969        u32 gic_base_lo, gic_base_hi;
 970        u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI);
 971        u32 dma_err_cfg = QMAN_DMA_ERR_MSG_EN;
 972
 973        mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
 974        mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
 975        so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
 976        so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
 977
 978        gic_base_lo =
 979                lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
 980        gic_base_hi =
 981                upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
 982
 983        WREG32(mmDMA_QM_0_PQ_BASE_LO + reg_off, lower_32_bits(bus_address));
 984        WREG32(mmDMA_QM_0_PQ_BASE_HI + reg_off, upper_32_bits(bus_address));
 985
 986        WREG32(mmDMA_QM_0_PQ_SIZE + reg_off, ilog2(HL_QUEUE_LENGTH));
 987        WREG32(mmDMA_QM_0_PQ_PI + reg_off, 0);
 988        WREG32(mmDMA_QM_0_PQ_CI + reg_off, 0);
 989
 990        WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
 991        WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
 992        WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
 993        WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
 994        WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
 995        WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
 996        WREG32(mmDMA_QM_0_GLBL_ERR_WDATA + reg_off,
 997                        GOYA_ASYNC_EVENT_ID_DMA0_QM + dma_id);
 998
 999        /* PQ has buffer of 2 cache lines, while CQ has 8 lines */
1000        WREG32(mmDMA_QM_0_PQ_CFG1 + reg_off, 0x00020002);
1001        WREG32(mmDMA_QM_0_CQ_CFG1 + reg_off, 0x00080008);
1002
1003        if (goya->hw_cap_initialized & HW_CAP_MMU)
1004                WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_PARTLY_TRUSTED);
1005        else
1006                WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED);
1007
1008        if (hdev->stop_on_err)
1009                dma_err_cfg |= 1 << DMA_QM_0_GLBL_ERR_CFG_DMA_STOP_ON_ERR_SHIFT;
1010
1011        WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, dma_err_cfg);
1012        WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE);
1013}
1014
1015static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
1016{
1017        u32 gic_base_lo, gic_base_hi;
1018        u64 sob_addr;
1019        u32 reg_off = dma_id * (mmDMA_CH_1_CFG1 - mmDMA_CH_0_CFG1);
1020
1021        gic_base_lo =
1022                lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1023        gic_base_hi =
1024                upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1025
1026        WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO + reg_off, gic_base_lo);
1027        WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI + reg_off, gic_base_hi);
1028        WREG32(mmDMA_CH_0_ERRMSG_WDATA + reg_off,
1029                        GOYA_ASYNC_EVENT_ID_DMA0_CH + dma_id);
1030
1031        if (dma_id)
1032                sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
1033                                (dma_id - 1) * 4;
1034        else
1035                sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
1036
1037        WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr));
1038        WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001);
1039}
1040
1041/*
1042 * goya_init_dma_qmans - Initialize QMAN DMA registers
1043 *
1044 * @hdev: pointer to hl_device structure
1045 *
1046 * Initialize the H/W registers of the QMAN DMA channels
1047 *
1048 */
1049void goya_init_dma_qmans(struct hl_device *hdev)
1050{
1051        struct goya_device *goya = hdev->asic_specific;
1052        struct hl_hw_queue *q;
1053        int i;
1054
1055        if (goya->hw_cap_initialized & HW_CAP_DMA)
1056                return;
1057
1058        q = &hdev->kernel_queues[0];
1059
1060        for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) {
1061                q->cq_id = q->msi_vec = i;
1062                goya_init_dma_qman(hdev, i, q->bus_address);
1063                goya_init_dma_ch(hdev, i);
1064        }
1065
1066        goya->hw_cap_initialized |= HW_CAP_DMA;
1067}
1068
1069/*
1070 * goya_disable_external_queues - Disable external queues
1071 *
1072 * @hdev: pointer to hl_device structure
1073 *
1074 */
1075static void goya_disable_external_queues(struct hl_device *hdev)
1076{
1077        struct goya_device *goya = hdev->asic_specific;
1078
1079        if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1080                return;
1081
1082        WREG32(mmDMA_QM_0_GLBL_CFG0, 0);
1083        WREG32(mmDMA_QM_1_GLBL_CFG0, 0);
1084        WREG32(mmDMA_QM_2_GLBL_CFG0, 0);
1085        WREG32(mmDMA_QM_3_GLBL_CFG0, 0);
1086        WREG32(mmDMA_QM_4_GLBL_CFG0, 0);
1087}
1088
1089static int goya_stop_queue(struct hl_device *hdev, u32 cfg_reg,
1090                                u32 cp_sts_reg, u32 glbl_sts0_reg)
1091{
1092        int rc;
1093        u32 status;
1094
1095        /* use the values of TPC0 as they are all the same*/
1096
1097        WREG32(cfg_reg, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
1098
1099        status = RREG32(cp_sts_reg);
1100        if (status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK) {
1101                rc = hl_poll_timeout(
1102                        hdev,
1103                        cp_sts_reg,
1104                        status,
1105                        !(status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK),
1106                        1000,
1107                        QMAN_FENCE_TIMEOUT_USEC);
1108
1109                /* if QMAN is stuck in fence no need to check for stop */
1110                if (rc)
1111                        return 0;
1112        }
1113
1114        rc = hl_poll_timeout(
1115                hdev,
1116                glbl_sts0_reg,
1117                status,
1118                (status & TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK),
1119                1000,
1120                QMAN_STOP_TIMEOUT_USEC);
1121
1122        if (rc) {
1123                dev_err(hdev->dev,
1124                        "Timeout while waiting for QMAN to stop\n");
1125                return -EINVAL;
1126        }
1127
1128        return 0;
1129}
1130
1131/*
1132 * goya_stop_external_queues - Stop external queues
1133 *
1134 * @hdev: pointer to hl_device structure
1135 *
1136 * Returns 0 on success
1137 *
1138 */
1139static int goya_stop_external_queues(struct hl_device *hdev)
1140{
1141        int rc, retval = 0;
1142
1143        struct goya_device *goya = hdev->asic_specific;
1144
1145        if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1146                return retval;
1147
1148        rc = goya_stop_queue(hdev,
1149                        mmDMA_QM_0_GLBL_CFG1,
1150                        mmDMA_QM_0_CP_STS,
1151                        mmDMA_QM_0_GLBL_STS0);
1152
1153        if (rc) {
1154                dev_err(hdev->dev, "failed to stop DMA QMAN 0\n");
1155                retval = -EIO;
1156        }
1157
1158        rc = goya_stop_queue(hdev,
1159                        mmDMA_QM_1_GLBL_CFG1,
1160                        mmDMA_QM_1_CP_STS,
1161                        mmDMA_QM_1_GLBL_STS0);
1162
1163        if (rc) {
1164                dev_err(hdev->dev, "failed to stop DMA QMAN 1\n");
1165                retval = -EIO;
1166        }
1167
1168        rc = goya_stop_queue(hdev,
1169                        mmDMA_QM_2_GLBL_CFG1,
1170                        mmDMA_QM_2_CP_STS,
1171                        mmDMA_QM_2_GLBL_STS0);
1172
1173        if (rc) {
1174                dev_err(hdev->dev, "failed to stop DMA QMAN 2\n");
1175                retval = -EIO;
1176        }
1177
1178        rc = goya_stop_queue(hdev,
1179                        mmDMA_QM_3_GLBL_CFG1,
1180                        mmDMA_QM_3_CP_STS,
1181                        mmDMA_QM_3_GLBL_STS0);
1182
1183        if (rc) {
1184                dev_err(hdev->dev, "failed to stop DMA QMAN 3\n");
1185                retval = -EIO;
1186        }
1187
1188        rc = goya_stop_queue(hdev,
1189                        mmDMA_QM_4_GLBL_CFG1,
1190                        mmDMA_QM_4_CP_STS,
1191                        mmDMA_QM_4_GLBL_STS0);
1192
1193        if (rc) {
1194                dev_err(hdev->dev, "failed to stop DMA QMAN 4\n");
1195                retval = -EIO;
1196        }
1197
1198        return retval;
1199}
1200
1201/*
1202 * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU
1203 *
1204 * @hdev: pointer to hl_device structure
1205 *
1206 * Returns 0 on success
1207 *
1208 */
1209int goya_init_cpu_queues(struct hl_device *hdev)
1210{
1211        struct goya_device *goya = hdev->asic_specific;
1212        struct asic_fixed_properties *prop = &hdev->asic_prop;
1213        struct hl_eq *eq;
1214        u32 status;
1215        struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
1216        int err;
1217
1218        if (!hdev->cpu_queues_enable)
1219                return 0;
1220
1221        if (goya->hw_cap_initialized & HW_CAP_CPU_Q)
1222                return 0;
1223
1224        eq = &hdev->event_queue;
1225
1226        WREG32(mmCPU_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
1227        WREG32(mmCPU_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
1228
1229        WREG32(mmCPU_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
1230        WREG32(mmCPU_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
1231
1232        WREG32(mmCPU_CQ_BASE_ADDR_LOW,
1233                        lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1234        WREG32(mmCPU_CQ_BASE_ADDR_HIGH,
1235                        upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1236
1237        WREG32(mmCPU_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
1238        WREG32(mmCPU_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
1239        WREG32(mmCPU_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
1240
1241        /* Used for EQ CI */
1242        WREG32(mmCPU_EQ_CI, 0);
1243
1244        WREG32(mmCPU_IF_PF_PQ_PI, 0);
1245
1246        WREG32(mmCPU_PQ_INIT_STATUS, PQ_INIT_STATUS_READY_FOR_CP);
1247
1248        WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
1249                        GOYA_ASYNC_EVENT_ID_PI_UPDATE);
1250
1251        err = hl_poll_timeout(
1252                hdev,
1253                mmCPU_PQ_INIT_STATUS,
1254                status,
1255                (status == PQ_INIT_STATUS_READY_FOR_HOST),
1256                1000,
1257                GOYA_CPU_TIMEOUT_USEC);
1258
1259        if (err) {
1260                dev_err(hdev->dev,
1261                        "Failed to setup communication with device CPU\n");
1262                return -EIO;
1263        }
1264
1265        /* update FW application security bits */
1266        if (prop->fw_security_status_valid)
1267                prop->fw_app_security_map = RREG32(mmCPU_BOOT_DEV_STS0);
1268
1269        goya->hw_cap_initialized |= HW_CAP_CPU_Q;
1270        return 0;
1271}
1272
1273static void goya_set_pll_refclk(struct hl_device *hdev)
1274{
1275        WREG32(mmCPU_PLL_DIV_SEL_0, 0x0);
1276        WREG32(mmCPU_PLL_DIV_SEL_1, 0x0);
1277        WREG32(mmCPU_PLL_DIV_SEL_2, 0x0);
1278        WREG32(mmCPU_PLL_DIV_SEL_3, 0x0);
1279
1280        WREG32(mmIC_PLL_DIV_SEL_0, 0x0);
1281        WREG32(mmIC_PLL_DIV_SEL_1, 0x0);
1282        WREG32(mmIC_PLL_DIV_SEL_2, 0x0);
1283        WREG32(mmIC_PLL_DIV_SEL_3, 0x0);
1284
1285        WREG32(mmMC_PLL_DIV_SEL_0, 0x0);
1286        WREG32(mmMC_PLL_DIV_SEL_1, 0x0);
1287        WREG32(mmMC_PLL_DIV_SEL_2, 0x0);
1288        WREG32(mmMC_PLL_DIV_SEL_3, 0x0);
1289
1290        WREG32(mmPSOC_MME_PLL_DIV_SEL_0, 0x0);
1291        WREG32(mmPSOC_MME_PLL_DIV_SEL_1, 0x0);
1292        WREG32(mmPSOC_MME_PLL_DIV_SEL_2, 0x0);
1293        WREG32(mmPSOC_MME_PLL_DIV_SEL_3, 0x0);
1294
1295        WREG32(mmPSOC_PCI_PLL_DIV_SEL_0, 0x0);
1296        WREG32(mmPSOC_PCI_PLL_DIV_SEL_1, 0x0);
1297        WREG32(mmPSOC_PCI_PLL_DIV_SEL_2, 0x0);
1298        WREG32(mmPSOC_PCI_PLL_DIV_SEL_3, 0x0);
1299
1300        WREG32(mmPSOC_EMMC_PLL_DIV_SEL_0, 0x0);
1301        WREG32(mmPSOC_EMMC_PLL_DIV_SEL_1, 0x0);
1302        WREG32(mmPSOC_EMMC_PLL_DIV_SEL_2, 0x0);
1303        WREG32(mmPSOC_EMMC_PLL_DIV_SEL_3, 0x0);
1304
1305        WREG32(mmTPC_PLL_DIV_SEL_0, 0x0);
1306        WREG32(mmTPC_PLL_DIV_SEL_1, 0x0);
1307        WREG32(mmTPC_PLL_DIV_SEL_2, 0x0);
1308        WREG32(mmTPC_PLL_DIV_SEL_3, 0x0);
1309}
1310
1311static void goya_disable_clk_rlx(struct hl_device *hdev)
1312{
1313        WREG32(mmPSOC_MME_PLL_CLK_RLX_0, 0x100010);
1314        WREG32(mmIC_PLL_CLK_RLX_0, 0x100010);
1315}
1316
1317static void _goya_tpc_mbist_workaround(struct hl_device *hdev, u8 tpc_id)
1318{
1319        u64 tpc_eml_address;
1320        u32 val, tpc_offset, tpc_eml_offset, tpc_slm_offset;
1321        int err, slm_index;
1322
1323        tpc_offset = tpc_id * 0x40000;
1324        tpc_eml_offset = tpc_id * 0x200000;
1325        tpc_eml_address = (mmTPC0_EML_CFG_BASE + tpc_eml_offset - CFG_BASE);
1326        tpc_slm_offset = tpc_eml_address + 0x100000;
1327
1328        /*
1329         * Workaround for Bug H2 #2443 :
1330         * "TPC SB is not initialized on chip reset"
1331         */
1332
1333        val = RREG32(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset);
1334        if (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_ACTIVE_MASK)
1335                dev_warn(hdev->dev, "TPC%d MBIST ACTIVE is not cleared\n",
1336                        tpc_id);
1337
1338        WREG32(mmTPC0_CFG_FUNC_MBIST_PAT + tpc_offset, val & 0xFFFFF000);
1339
1340        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_0 + tpc_offset, 0x37FF);
1341        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_1 + tpc_offset, 0x303F);
1342        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_2 + tpc_offset, 0x71FF);
1343        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_3 + tpc_offset, 0x71FF);
1344        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_4 + tpc_offset, 0x70FF);
1345        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_5 + tpc_offset, 0x70FF);
1346        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_6 + tpc_offset, 0x70FF);
1347        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_7 + tpc_offset, 0x70FF);
1348        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_8 + tpc_offset, 0x70FF);
1349        WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_9 + tpc_offset, 0x70FF);
1350
1351        WREG32_OR(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1352                1 << TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_START_SHIFT);
1353
1354        err = hl_poll_timeout(
1355                hdev,
1356                mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1357                val,
1358                (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_DONE_MASK),
1359                1000,
1360                HL_DEVICE_TIMEOUT_USEC);
1361
1362        if (err)
1363                dev_err(hdev->dev,
1364                        "Timeout while waiting for TPC%d MBIST DONE\n", tpc_id);
1365
1366        WREG32_OR(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1367                1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT);
1368
1369        msleep(GOYA_RESET_WAIT_MSEC);
1370
1371        WREG32_AND(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1372                ~(1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT));
1373
1374        msleep(GOYA_RESET_WAIT_MSEC);
1375
1376        for (slm_index = 0 ; slm_index < 256 ; slm_index++)
1377                WREG32(tpc_slm_offset + (slm_index << 2), 0);
1378
1379        val = RREG32(tpc_slm_offset);
1380}
1381
1382static void goya_tpc_mbist_workaround(struct hl_device *hdev)
1383{
1384        struct goya_device *goya = hdev->asic_specific;
1385        int i;
1386
1387        if (hdev->pldm)
1388                return;
1389
1390        if (goya->hw_cap_initialized & HW_CAP_TPC_MBIST)
1391                return;
1392
1393        /* Workaround for H2 #2443 */
1394
1395        for (i = 0 ; i < TPC_MAX_NUM ; i++)
1396                _goya_tpc_mbist_workaround(hdev, i);
1397
1398        goya->hw_cap_initialized |= HW_CAP_TPC_MBIST;
1399}
1400
1401/*
1402 * goya_init_golden_registers - Initialize golden registers
1403 *
1404 * @hdev: pointer to hl_device structure
1405 *
1406 * Initialize the H/W registers of the device
1407 *
1408 */
1409static void goya_init_golden_registers(struct hl_device *hdev)
1410{
1411        struct goya_device *goya = hdev->asic_specific;
1412        u32 polynom[10], tpc_intr_mask, offset;
1413        int i;
1414
1415        if (goya->hw_cap_initialized & HW_CAP_GOLDEN)
1416                return;
1417
1418        polynom[0] = 0x00020080;
1419        polynom[1] = 0x00401000;
1420        polynom[2] = 0x00200800;
1421        polynom[3] = 0x00002000;
1422        polynom[4] = 0x00080200;
1423        polynom[5] = 0x00040100;
1424        polynom[6] = 0x00100400;
1425        polynom[7] = 0x00004000;
1426        polynom[8] = 0x00010000;
1427        polynom[9] = 0x00008000;
1428
1429        /* Mask all arithmetic interrupts from TPC */
1430        tpc_intr_mask = 0x7FFF;
1431
1432        for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x20000) {
1433                WREG32(mmSRAM_Y0_X0_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1434                WREG32(mmSRAM_Y0_X1_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1435                WREG32(mmSRAM_Y0_X2_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1436                WREG32(mmSRAM_Y0_X3_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1437                WREG32(mmSRAM_Y0_X4_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1438
1439                WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_L_ARB + offset, 0x204);
1440                WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_L_ARB + offset, 0x204);
1441                WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_L_ARB + offset, 0x204);
1442                WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_L_ARB + offset, 0x204);
1443                WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_L_ARB + offset, 0x204);
1444
1445
1446                WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_E_ARB + offset, 0x206);
1447                WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_E_ARB + offset, 0x206);
1448                WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_E_ARB + offset, 0x206);
1449                WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_E_ARB + offset, 0x207);
1450                WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_E_ARB + offset, 0x207);
1451
1452                WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_W_ARB + offset, 0x207);
1453                WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_W_ARB + offset, 0x207);
1454                WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_W_ARB + offset, 0x206);
1455                WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_W_ARB + offset, 0x206);
1456                WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_W_ARB + offset, 0x206);
1457
1458                WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_E_ARB + offset, 0x101);
1459                WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_E_ARB + offset, 0x102);
1460                WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_E_ARB + offset, 0x103);
1461                WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_E_ARB + offset, 0x104);
1462                WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_E_ARB + offset, 0x105);
1463
1464                WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_W_ARB + offset, 0x105);
1465                WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_W_ARB + offset, 0x104);
1466                WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_W_ARB + offset, 0x103);
1467                WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_W_ARB + offset, 0x102);
1468                WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_W_ARB + offset, 0x101);
1469        }
1470
1471        WREG32(mmMME_STORE_MAX_CREDIT, 0x21);
1472        WREG32(mmMME_AGU, 0x0f0f0f10);
1473        WREG32(mmMME_SEI_MASK, ~0x0);
1474
1475        WREG32(mmMME6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1476        WREG32(mmMME5_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1477        WREG32(mmMME4_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1478        WREG32(mmMME3_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1479        WREG32(mmMME2_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1480        WREG32(mmMME1_RTR_HBW_RD_RQ_N_ARB, 0x07010701);
1481        WREG32(mmMME6_RTR_HBW_RD_RQ_S_ARB, 0x04010401);
1482        WREG32(mmMME5_RTR_HBW_RD_RQ_S_ARB, 0x04050401);
1483        WREG32(mmMME4_RTR_HBW_RD_RQ_S_ARB, 0x03070301);
1484        WREG32(mmMME3_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1485        WREG32(mmMME2_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1486        WREG32(mmMME1_RTR_HBW_RD_RQ_S_ARB, 0x01050105);
1487        WREG32(mmMME6_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1488        WREG32(mmMME5_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1489        WREG32(mmMME4_RTR_HBW_RD_RQ_W_ARB, 0x01040301);
1490        WREG32(mmMME3_RTR_HBW_RD_RQ_W_ARB, 0x01030401);
1491        WREG32(mmMME2_RTR_HBW_RD_RQ_W_ARB, 0x01040101);
1492        WREG32(mmMME1_RTR_HBW_RD_RQ_W_ARB, 0x01050101);
1493        WREG32(mmMME6_RTR_HBW_WR_RQ_N_ARB, 0x02020202);
1494        WREG32(mmMME5_RTR_HBW_WR_RQ_N_ARB, 0x01070101);
1495        WREG32(mmMME4_RTR_HBW_WR_RQ_N_ARB, 0x02020201);
1496        WREG32(mmMME3_RTR_HBW_WR_RQ_N_ARB, 0x07020701);
1497        WREG32(mmMME2_RTR_HBW_WR_RQ_N_ARB, 0x01020101);
1498        WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1499        WREG32(mmMME6_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1500        WREG32(mmMME5_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1501        WREG32(mmMME4_RTR_HBW_WR_RQ_S_ARB, 0x07020701);
1502        WREG32(mmMME3_RTR_HBW_WR_RQ_S_ARB, 0x02020201);
1503        WREG32(mmMME2_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1504        WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01020102);
1505        WREG32(mmMME6_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1506        WREG32(mmMME5_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1507        WREG32(mmMME4_RTR_HBW_WR_RQ_W_ARB, 0x07020707);
1508        WREG32(mmMME3_RTR_HBW_WR_RQ_W_ARB, 0x01020201);
1509        WREG32(mmMME2_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1510        WREG32(mmMME1_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1511        WREG32(mmMME6_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1512        WREG32(mmMME5_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1513        WREG32(mmMME4_RTR_HBW_RD_RS_N_ARB, 0x01060102);
1514        WREG32(mmMME3_RTR_HBW_RD_RS_N_ARB, 0x01040102);
1515        WREG32(mmMME2_RTR_HBW_RD_RS_N_ARB, 0x01020102);
1516        WREG32(mmMME1_RTR_HBW_RD_RS_N_ARB, 0x01020107);
1517        WREG32(mmMME6_RTR_HBW_RD_RS_S_ARB, 0x01020106);
1518        WREG32(mmMME5_RTR_HBW_RD_RS_S_ARB, 0x01020102);
1519        WREG32(mmMME4_RTR_HBW_RD_RS_S_ARB, 0x01040102);
1520        WREG32(mmMME3_RTR_HBW_RD_RS_S_ARB, 0x01060102);
1521        WREG32(mmMME2_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1522        WREG32(mmMME1_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1523        WREG32(mmMME6_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1524        WREG32(mmMME5_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1525        WREG32(mmMME4_RTR_HBW_RD_RS_E_ARB, 0x01040602);
1526        WREG32(mmMME3_RTR_HBW_RD_RS_E_ARB, 0x01060402);
1527        WREG32(mmMME2_RTR_HBW_RD_RS_E_ARB, 0x01070202);
1528        WREG32(mmMME1_RTR_HBW_RD_RS_E_ARB, 0x01070102);
1529        WREG32(mmMME6_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1530        WREG32(mmMME5_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1531        WREG32(mmMME4_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1532        WREG32(mmMME3_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1533        WREG32(mmMME2_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1534        WREG32(mmMME1_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1535        WREG32(mmMME6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1536        WREG32(mmMME5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1537        WREG32(mmMME4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1538        WREG32(mmMME3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1539        WREG32(mmMME2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1540        WREG32(mmMME1_RTR_HBW_WR_RS_N_ARB, 0x01010107);
1541        WREG32(mmMME6_RTR_HBW_WR_RS_S_ARB, 0x01010107);
1542        WREG32(mmMME5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1543        WREG32(mmMME4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1544        WREG32(mmMME3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1545        WREG32(mmMME2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1546        WREG32(mmMME1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1547        WREG32(mmMME6_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1548        WREG32(mmMME5_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1549        WREG32(mmMME4_RTR_HBW_WR_RS_E_ARB, 0x01040301);
1550        WREG32(mmMME3_RTR_HBW_WR_RS_E_ARB, 0x01030401);
1551        WREG32(mmMME2_RTR_HBW_WR_RS_E_ARB, 0x01040101);
1552        WREG32(mmMME1_RTR_HBW_WR_RS_E_ARB, 0x01050101);
1553        WREG32(mmMME6_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1554        WREG32(mmMME5_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1555        WREG32(mmMME4_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1556        WREG32(mmMME3_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1557        WREG32(mmMME2_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1558        WREG32(mmMME1_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1559
1560        WREG32(mmTPC1_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1561        WREG32(mmTPC1_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1562        WREG32(mmTPC1_RTR_HBW_RD_RQ_E_ARB, 0x01060101);
1563        WREG32(mmTPC1_RTR_HBW_WR_RQ_N_ARB, 0x02020102);
1564        WREG32(mmTPC1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1565        WREG32(mmTPC1_RTR_HBW_WR_RQ_E_ARB, 0x02070202);
1566        WREG32(mmTPC1_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1567        WREG32(mmTPC1_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1568        WREG32(mmTPC1_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1569        WREG32(mmTPC1_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1570        WREG32(mmTPC1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1571        WREG32(mmTPC1_RTR_HBW_WR_RS_W_ARB, 0x01050101);
1572
1573        WREG32(mmTPC2_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1574        WREG32(mmTPC2_RTR_HBW_RD_RQ_S_ARB, 0x01050101);
1575        WREG32(mmTPC2_RTR_HBW_RD_RQ_E_ARB, 0x01010201);
1576        WREG32(mmTPC2_RTR_HBW_WR_RQ_N_ARB, 0x02040102);
1577        WREG32(mmTPC2_RTR_HBW_WR_RQ_S_ARB, 0x01050101);
1578        WREG32(mmTPC2_RTR_HBW_WR_RQ_E_ARB, 0x02060202);
1579        WREG32(mmTPC2_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1580        WREG32(mmTPC2_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1581        WREG32(mmTPC2_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1582        WREG32(mmTPC2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1583        WREG32(mmTPC2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1584        WREG32(mmTPC2_RTR_HBW_WR_RS_W_ARB, 0x01040101);
1585
1586        WREG32(mmTPC3_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1587        WREG32(mmTPC3_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1588        WREG32(mmTPC3_RTR_HBW_RD_RQ_E_ARB, 0x01040301);
1589        WREG32(mmTPC3_RTR_HBW_WR_RQ_N_ARB, 0x02060102);
1590        WREG32(mmTPC3_RTR_HBW_WR_RQ_S_ARB, 0x01040101);
1591        WREG32(mmTPC3_RTR_HBW_WR_RQ_E_ARB, 0x01040301);
1592        WREG32(mmTPC3_RTR_HBW_RD_RS_N_ARB, 0x01040201);
1593        WREG32(mmTPC3_RTR_HBW_RD_RS_S_ARB, 0x01060201);
1594        WREG32(mmTPC3_RTR_HBW_RD_RS_W_ARB, 0x01060402);
1595        WREG32(mmTPC3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1596        WREG32(mmTPC3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1597        WREG32(mmTPC3_RTR_HBW_WR_RS_W_ARB, 0x01030401);
1598
1599        WREG32(mmTPC4_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1600        WREG32(mmTPC4_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1601        WREG32(mmTPC4_RTR_HBW_RD_RQ_E_ARB, 0x01030401);
1602        WREG32(mmTPC4_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1603        WREG32(mmTPC4_RTR_HBW_WR_RQ_S_ARB, 0x01030101);
1604        WREG32(mmTPC4_RTR_HBW_WR_RQ_E_ARB, 0x02060702);
1605        WREG32(mmTPC4_RTR_HBW_RD_RS_N_ARB, 0x01060201);
1606        WREG32(mmTPC4_RTR_HBW_RD_RS_S_ARB, 0x01040201);
1607        WREG32(mmTPC4_RTR_HBW_RD_RS_W_ARB, 0x01040602);
1608        WREG32(mmTPC4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1609        WREG32(mmTPC4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1610        WREG32(mmTPC4_RTR_HBW_WR_RS_W_ARB, 0x01040301);
1611
1612        WREG32(mmTPC5_RTR_HBW_RD_RQ_N_ARB, 0x01050101);
1613        WREG32(mmTPC5_RTR_HBW_RD_RQ_S_ARB, 0x01020101);
1614        WREG32(mmTPC5_RTR_HBW_RD_RQ_E_ARB, 0x01200501);
1615        WREG32(mmTPC5_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1616        WREG32(mmTPC5_RTR_HBW_WR_RQ_S_ARB, 0x01020101);
1617        WREG32(mmTPC5_RTR_HBW_WR_RQ_E_ARB, 0x02020602);
1618        WREG32(mmTPC5_RTR_HBW_RD_RS_N_ARB, 0x01070201);
1619        WREG32(mmTPC5_RTR_HBW_RD_RS_S_ARB, 0x01020201);
1620        WREG32(mmTPC5_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1621        WREG32(mmTPC5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1622        WREG32(mmTPC5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1623        WREG32(mmTPC5_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1624
1625        WREG32(mmTPC6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1626        WREG32(mmTPC6_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1627        WREG32(mmTPC6_RTR_HBW_RD_RQ_E_ARB, 0x01010601);
1628        WREG32(mmTPC6_RTR_HBW_WR_RQ_N_ARB, 0x01010101);
1629        WREG32(mmTPC6_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1630        WREG32(mmTPC6_RTR_HBW_WR_RQ_E_ARB, 0x02020702);
1631        WREG32(mmTPC6_RTR_HBW_RD_RS_N_ARB, 0x01010101);
1632        WREG32(mmTPC6_RTR_HBW_RD_RS_S_ARB, 0x01010101);
1633        WREG32(mmTPC6_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1634        WREG32(mmTPC6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1635        WREG32(mmTPC6_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1636        WREG32(mmTPC6_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1637
1638        for (i = 0, offset = 0 ; i < 10 ; i++, offset += 4) {
1639                WREG32(mmMME1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1640                WREG32(mmMME2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1641                WREG32(mmMME3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1642                WREG32(mmMME4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1643                WREG32(mmMME5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1644                WREG32(mmMME6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1645
1646                WREG32(mmTPC0_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1647                WREG32(mmTPC1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1648                WREG32(mmTPC2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1649                WREG32(mmTPC3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1650                WREG32(mmTPC4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1651                WREG32(mmTPC5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1652                WREG32(mmTPC6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1653                WREG32(mmTPC7_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1654
1655                WREG32(mmPCI_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1656                WREG32(mmDMA_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1657        }
1658
1659        for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x40000) {
1660                WREG32(mmMME1_RTR_SCRAMB_EN + offset,
1661                                1 << MME1_RTR_SCRAMB_EN_VAL_SHIFT);
1662                WREG32(mmMME1_RTR_NON_LIN_SCRAMB + offset,
1663                                1 << MME1_RTR_NON_LIN_SCRAMB_EN_SHIFT);
1664        }
1665
1666        for (i = 0, offset = 0 ; i < 8 ; i++, offset += 0x40000) {
1667                /*
1668                 * Workaround for Bug H2 #2441 :
1669                 * "ST.NOP set trace event illegal opcode"
1670                 */
1671                WREG32(mmTPC0_CFG_TPC_INTR_MASK + offset, tpc_intr_mask);
1672
1673                WREG32(mmTPC0_NRTR_SCRAMB_EN + offset,
1674                                1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT);
1675                WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset,
1676                                1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1677
1678                WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset,
1679                                ICACHE_FETCH_LINE_NUM, 2);
1680        }
1681
1682        WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT);
1683        WREG32(mmDMA_NRTR_NON_LIN_SCRAMB,
1684                        1 << DMA_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1685
1686        WREG32(mmPCI_NRTR_SCRAMB_EN, 1 << PCI_NRTR_SCRAMB_EN_VAL_SHIFT);
1687        WREG32(mmPCI_NRTR_NON_LIN_SCRAMB,
1688                        1 << PCI_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1689
1690        /*
1691         * Workaround for H2 #HW-23 bug
1692         * Set DMA max outstanding read requests to 240 on DMA CH 1.
1693         * This limitation is still large enough to not affect Gen4 bandwidth.
1694         * We need to only limit that DMA channel because the user can only read
1695         * from Host using DMA CH 1
1696         */
1697        WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
1698
1699        WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
1700
1701        goya->hw_cap_initialized |= HW_CAP_GOLDEN;
1702}
1703
1704static void goya_init_mme_qman(struct hl_device *hdev)
1705{
1706        u32 mtr_base_lo, mtr_base_hi;
1707        u32 so_base_lo, so_base_hi;
1708        u32 gic_base_lo, gic_base_hi;
1709        u64 qman_base_addr;
1710
1711        mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1712        mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1713        so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1714        so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1715
1716        gic_base_lo =
1717                lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1718        gic_base_hi =
1719                upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1720
1721        qman_base_addr = hdev->asic_prop.sram_base_address +
1722                                MME_QMAN_BASE_OFFSET;
1723
1724        WREG32(mmMME_QM_PQ_BASE_LO, lower_32_bits(qman_base_addr));
1725        WREG32(mmMME_QM_PQ_BASE_HI, upper_32_bits(qman_base_addr));
1726        WREG32(mmMME_QM_PQ_SIZE, ilog2(MME_QMAN_LENGTH));
1727        WREG32(mmMME_QM_PQ_PI, 0);
1728        WREG32(mmMME_QM_PQ_CI, 0);
1729        WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET, 0x10C0);
1730        WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET, 0x10C4);
1731        WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET, 0x10C8);
1732        WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET, 0x10CC);
1733
1734        WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1735        WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1736        WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1737        WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1738
1739        /* QMAN CQ has 8 cache lines */
1740        WREG32(mmMME_QM_CQ_CFG1, 0x00080008);
1741
1742        WREG32(mmMME_QM_GLBL_ERR_ADDR_LO, gic_base_lo);
1743        WREG32(mmMME_QM_GLBL_ERR_ADDR_HI, gic_base_hi);
1744
1745        WREG32(mmMME_QM_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_QM);
1746
1747        WREG32(mmMME_QM_GLBL_ERR_CFG, QMAN_MME_ERR_MSG_EN);
1748
1749        WREG32(mmMME_QM_GLBL_PROT, QMAN_MME_ERR_PROT);
1750
1751        WREG32(mmMME_QM_GLBL_CFG0, QMAN_MME_ENABLE);
1752}
1753
1754static void goya_init_mme_cmdq(struct hl_device *hdev)
1755{
1756        u32 mtr_base_lo, mtr_base_hi;
1757        u32 so_base_lo, so_base_hi;
1758        u32 gic_base_lo, gic_base_hi;
1759
1760        mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1761        mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1762        so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1763        so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1764
1765        gic_base_lo =
1766                lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1767        gic_base_hi =
1768                upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1769
1770        WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1771        WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1772        WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1773        WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1774
1775        /* CMDQ CQ has 20 cache lines */
1776        WREG32(mmMME_CMDQ_CQ_CFG1, 0x00140014);
1777
1778        WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO, gic_base_lo);
1779        WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI, gic_base_hi);
1780
1781        WREG32(mmMME_CMDQ_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_CMDQ);
1782
1783        WREG32(mmMME_CMDQ_GLBL_ERR_CFG, CMDQ_MME_ERR_MSG_EN);
1784
1785        WREG32(mmMME_CMDQ_GLBL_PROT, CMDQ_MME_ERR_PROT);
1786
1787        WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE);
1788}
1789
1790void goya_init_mme_qmans(struct hl_device *hdev)
1791{
1792        struct goya_device *goya = hdev->asic_specific;
1793        u32 so_base_lo, so_base_hi;
1794
1795        if (goya->hw_cap_initialized & HW_CAP_MME)
1796                return;
1797
1798        so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1799        so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1800
1801        WREG32(mmMME_SM_BASE_ADDRESS_LOW, so_base_lo);
1802        WREG32(mmMME_SM_BASE_ADDRESS_HIGH, so_base_hi);
1803
1804        goya_init_mme_qman(hdev);
1805        goya_init_mme_cmdq(hdev);
1806
1807        goya->hw_cap_initialized |= HW_CAP_MME;
1808}
1809
1810static void goya_init_tpc_qman(struct hl_device *hdev, u32 base_off, int tpc_id)
1811{
1812        u32 mtr_base_lo, mtr_base_hi;
1813        u32 so_base_lo, so_base_hi;
1814        u32 gic_base_lo, gic_base_hi;
1815        u64 qman_base_addr;
1816        u32 reg_off = tpc_id * (mmTPC1_QM_PQ_PI - mmTPC0_QM_PQ_PI);
1817
1818        mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1819        mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1820        so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1821        so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1822
1823        gic_base_lo =
1824                lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1825        gic_base_hi =
1826                upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1827
1828        qman_base_addr = hdev->asic_prop.sram_base_address + base_off;
1829
1830        WREG32(mmTPC0_QM_PQ_BASE_LO + reg_off, lower_32_bits(qman_base_addr));
1831        WREG32(mmTPC0_QM_PQ_BASE_HI + reg_off, upper_32_bits(qman_base_addr));
1832        WREG32(mmTPC0_QM_PQ_SIZE + reg_off, ilog2(TPC_QMAN_LENGTH));
1833        WREG32(mmTPC0_QM_PQ_PI + reg_off, 0);
1834        WREG32(mmTPC0_QM_PQ_CI + reg_off, 0);
1835        WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET + reg_off, 0x10C0);
1836        WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET + reg_off, 0x10C4);
1837        WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET + reg_off, 0x10C8);
1838        WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET + reg_off, 0x10CC);
1839
1840        WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1841        WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1842        WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1843        WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1844
1845        WREG32(mmTPC0_QM_CQ_CFG1 + reg_off, 0x00080008);
1846
1847        WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1848        WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1849
1850        WREG32(mmTPC0_QM_GLBL_ERR_WDATA + reg_off,
1851                        GOYA_ASYNC_EVENT_ID_TPC0_QM + tpc_id);
1852
1853        WREG32(mmTPC0_QM_GLBL_ERR_CFG + reg_off, QMAN_TPC_ERR_MSG_EN);
1854
1855        WREG32(mmTPC0_QM_GLBL_PROT + reg_off, QMAN_TPC_ERR_PROT);
1856
1857        WREG32(mmTPC0_QM_GLBL_CFG0 + reg_off, QMAN_TPC_ENABLE);
1858}
1859
1860static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id)
1861{
1862        u32 mtr_base_lo, mtr_base_hi;
1863        u32 so_base_lo, so_base_hi;
1864        u32 gic_base_lo, gic_base_hi;
1865        u32 reg_off = tpc_id * (mmTPC1_CMDQ_CQ_CFG1 - mmTPC0_CMDQ_CQ_CFG1);
1866
1867        mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1868        mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1869        so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1870        so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1871
1872        gic_base_lo =
1873                lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1874        gic_base_hi =
1875                upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1876
1877        WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1878        WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1879        WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1880        WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1881
1882        WREG32(mmTPC0_CMDQ_CQ_CFG1 + reg_off, 0x00140014);
1883
1884        WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1885        WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1886
1887        WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA + reg_off,
1888                        GOYA_ASYNC_EVENT_ID_TPC0_CMDQ + tpc_id);
1889
1890        WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG + reg_off, CMDQ_TPC_ERR_MSG_EN);
1891
1892        WREG32(mmTPC0_CMDQ_GLBL_PROT + reg_off, CMDQ_TPC_ERR_PROT);
1893
1894        WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE);
1895}
1896
1897void goya_init_tpc_qmans(struct hl_device *hdev)
1898{
1899        struct goya_device *goya = hdev->asic_specific;
1900        u32 so_base_lo, so_base_hi;
1901        u32 cfg_off = mmTPC1_CFG_SM_BASE_ADDRESS_LOW -
1902                        mmTPC0_CFG_SM_BASE_ADDRESS_LOW;
1903        int i;
1904
1905        if (goya->hw_cap_initialized & HW_CAP_TPC)
1906                return;
1907
1908        so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1909        so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1910
1911        for (i = 0 ; i < TPC_MAX_NUM ; i++) {
1912                WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW + i * cfg_off,
1913                                so_base_lo);
1914                WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + i * cfg_off,
1915                                so_base_hi);
1916        }
1917
1918        goya_init_tpc_qman(hdev, TPC0_QMAN_BASE_OFFSET, 0);
1919        goya_init_tpc_qman(hdev, TPC1_QMAN_BASE_OFFSET, 1);
1920        goya_init_tpc_qman(hdev, TPC2_QMAN_BASE_OFFSET, 2);
1921        goya_init_tpc_qman(hdev, TPC3_QMAN_BASE_OFFSET, 3);
1922        goya_init_tpc_qman(hdev, TPC4_QMAN_BASE_OFFSET, 4);
1923        goya_init_tpc_qman(hdev, TPC5_QMAN_BASE_OFFSET, 5);
1924        goya_init_tpc_qman(hdev, TPC6_QMAN_BASE_OFFSET, 6);
1925        goya_init_tpc_qman(hdev, TPC7_QMAN_BASE_OFFSET, 7);
1926
1927        for (i = 0 ; i < TPC_MAX_NUM ; i++)
1928                goya_init_tpc_cmdq(hdev, i);
1929
1930        goya->hw_cap_initialized |= HW_CAP_TPC;
1931}
1932
1933/*
1934 * goya_disable_internal_queues - Disable internal queues
1935 *
1936 * @hdev: pointer to hl_device structure
1937 *
1938 */
1939static void goya_disable_internal_queues(struct hl_device *hdev)
1940{
1941        struct goya_device *goya = hdev->asic_specific;
1942
1943        if (!(goya->hw_cap_initialized & HW_CAP_MME))
1944                goto disable_tpc;
1945
1946        WREG32(mmMME_QM_GLBL_CFG0, 0);
1947        WREG32(mmMME_CMDQ_GLBL_CFG0, 0);
1948
1949disable_tpc:
1950        if (!(goya->hw_cap_initialized & HW_CAP_TPC))
1951                return;
1952
1953        WREG32(mmTPC0_QM_GLBL_CFG0, 0);
1954        WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0);
1955
1956        WREG32(mmTPC1_QM_GLBL_CFG0, 0);
1957        WREG32(mmTPC1_CMDQ_GLBL_CFG0, 0);
1958
1959        WREG32(mmTPC2_QM_GLBL_CFG0, 0);
1960        WREG32(mmTPC2_CMDQ_GLBL_CFG0, 0);
1961
1962        WREG32(mmTPC3_QM_GLBL_CFG0, 0);
1963        WREG32(mmTPC3_CMDQ_GLBL_CFG0, 0);
1964
1965        WREG32(mmTPC4_QM_GLBL_CFG0, 0);
1966        WREG32(mmTPC4_CMDQ_GLBL_CFG0, 0);
1967
1968        WREG32(mmTPC5_QM_GLBL_CFG0, 0);
1969        WREG32(mmTPC5_CMDQ_GLBL_CFG0, 0);
1970
1971        WREG32(mmTPC6_QM_GLBL_CFG0, 0);
1972        WREG32(mmTPC6_CMDQ_GLBL_CFG0, 0);
1973
1974        WREG32(mmTPC7_QM_GLBL_CFG0, 0);
1975        WREG32(mmTPC7_CMDQ_GLBL_CFG0, 0);
1976}
1977
1978/*
1979 * goya_stop_internal_queues - Stop internal queues
1980 *
1981 * @hdev: pointer to hl_device structure
1982 *
1983 * Returns 0 on success
1984 *
1985 */
1986static int goya_stop_internal_queues(struct hl_device *hdev)
1987{
1988        struct goya_device *goya = hdev->asic_specific;
1989        int rc, retval = 0;
1990
1991        if (!(goya->hw_cap_initialized & HW_CAP_MME))
1992                goto stop_tpc;
1993
1994        /*
1995         * Each queue (QMAN) is a separate H/W logic. That means that each
1996         * QMAN can be stopped independently and failure to stop one does NOT
1997         * mandate we should not try to stop other QMANs
1998         */
1999
2000        rc = goya_stop_queue(hdev,
2001                        mmMME_QM_GLBL_CFG1,
2002                        mmMME_QM_CP_STS,
2003                        mmMME_QM_GLBL_STS0);
2004
2005        if (rc) {
2006                dev_err(hdev->dev, "failed to stop MME QMAN\n");
2007                retval = -EIO;
2008        }
2009
2010        rc = goya_stop_queue(hdev,
2011                        mmMME_CMDQ_GLBL_CFG1,
2012                        mmMME_CMDQ_CP_STS,
2013                        mmMME_CMDQ_GLBL_STS0);
2014
2015        if (rc) {
2016                dev_err(hdev->dev, "failed to stop MME CMDQ\n");
2017                retval = -EIO;
2018        }
2019
2020stop_tpc:
2021        if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2022                return retval;
2023
2024        rc = goya_stop_queue(hdev,
2025                        mmTPC0_QM_GLBL_CFG1,
2026                        mmTPC0_QM_CP_STS,
2027                        mmTPC0_QM_GLBL_STS0);
2028
2029        if (rc) {
2030                dev_err(hdev->dev, "failed to stop TPC 0 QMAN\n");
2031                retval = -EIO;
2032        }
2033
2034        rc = goya_stop_queue(hdev,
2035                        mmTPC0_CMDQ_GLBL_CFG1,
2036                        mmTPC0_CMDQ_CP_STS,
2037                        mmTPC0_CMDQ_GLBL_STS0);
2038
2039        if (rc) {
2040                dev_err(hdev->dev, "failed to stop TPC 0 CMDQ\n");
2041                retval = -EIO;
2042        }
2043
2044        rc = goya_stop_queue(hdev,
2045                        mmTPC1_QM_GLBL_CFG1,
2046                        mmTPC1_QM_CP_STS,
2047                        mmTPC1_QM_GLBL_STS0);
2048
2049        if (rc) {
2050                dev_err(hdev->dev, "failed to stop TPC 1 QMAN\n");
2051                retval = -EIO;
2052        }
2053
2054        rc = goya_stop_queue(hdev,
2055                        mmTPC1_CMDQ_GLBL_CFG1,
2056                        mmTPC1_CMDQ_CP_STS,
2057                        mmTPC1_CMDQ_GLBL_STS0);
2058
2059        if (rc) {
2060                dev_err(hdev->dev, "failed to stop TPC 1 CMDQ\n");
2061                retval = -EIO;
2062        }
2063
2064        rc = goya_stop_queue(hdev,
2065                        mmTPC2_QM_GLBL_CFG1,
2066                        mmTPC2_QM_CP_STS,
2067                        mmTPC2_QM_GLBL_STS0);
2068
2069        if (rc) {
2070                dev_err(hdev->dev, "failed to stop TPC 2 QMAN\n");
2071                retval = -EIO;
2072        }
2073
2074        rc = goya_stop_queue(hdev,
2075                        mmTPC2_CMDQ_GLBL_CFG1,
2076                        mmTPC2_CMDQ_CP_STS,
2077                        mmTPC2_CMDQ_GLBL_STS0);
2078
2079        if (rc) {
2080                dev_err(hdev->dev, "failed to stop TPC 2 CMDQ\n");
2081                retval = -EIO;
2082        }
2083
2084        rc = goya_stop_queue(hdev,
2085                        mmTPC3_QM_GLBL_CFG1,
2086                        mmTPC3_QM_CP_STS,
2087                        mmTPC3_QM_GLBL_STS0);
2088
2089        if (rc) {
2090                dev_err(hdev->dev, "failed to stop TPC 3 QMAN\n");
2091                retval = -EIO;
2092        }
2093
2094        rc = goya_stop_queue(hdev,
2095                        mmTPC3_CMDQ_GLBL_CFG1,
2096                        mmTPC3_CMDQ_CP_STS,
2097                        mmTPC3_CMDQ_GLBL_STS0);
2098
2099        if (rc) {
2100                dev_err(hdev->dev, "failed to stop TPC 3 CMDQ\n");
2101                retval = -EIO;
2102        }
2103
2104        rc = goya_stop_queue(hdev,
2105                        mmTPC4_QM_GLBL_CFG1,
2106                        mmTPC4_QM_CP_STS,
2107                        mmTPC4_QM_GLBL_STS0);
2108
2109        if (rc) {
2110                dev_err(hdev->dev, "failed to stop TPC 4 QMAN\n");
2111                retval = -EIO;
2112        }
2113
2114        rc = goya_stop_queue(hdev,
2115                        mmTPC4_CMDQ_GLBL_CFG1,
2116                        mmTPC4_CMDQ_CP_STS,
2117                        mmTPC4_CMDQ_GLBL_STS0);
2118
2119        if (rc) {
2120                dev_err(hdev->dev, "failed to stop TPC 4 CMDQ\n");
2121                retval = -EIO;
2122        }
2123
2124        rc = goya_stop_queue(hdev,
2125                        mmTPC5_QM_GLBL_CFG1,
2126                        mmTPC5_QM_CP_STS,
2127                        mmTPC5_QM_GLBL_STS0);
2128
2129        if (rc) {
2130                dev_err(hdev->dev, "failed to stop TPC 5 QMAN\n");
2131                retval = -EIO;
2132        }
2133
2134        rc = goya_stop_queue(hdev,
2135                        mmTPC5_CMDQ_GLBL_CFG1,
2136                        mmTPC5_CMDQ_CP_STS,
2137                        mmTPC5_CMDQ_GLBL_STS0);
2138
2139        if (rc) {
2140                dev_err(hdev->dev, "failed to stop TPC 5 CMDQ\n");
2141                retval = -EIO;
2142        }
2143
2144        rc = goya_stop_queue(hdev,
2145                        mmTPC6_QM_GLBL_CFG1,
2146                        mmTPC6_QM_CP_STS,
2147                        mmTPC6_QM_GLBL_STS0);
2148
2149        if (rc) {
2150                dev_err(hdev->dev, "failed to stop TPC 6 QMAN\n");
2151                retval = -EIO;
2152        }
2153
2154        rc = goya_stop_queue(hdev,
2155                        mmTPC6_CMDQ_GLBL_CFG1,
2156                        mmTPC6_CMDQ_CP_STS,
2157                        mmTPC6_CMDQ_GLBL_STS0);
2158
2159        if (rc) {
2160                dev_err(hdev->dev, "failed to stop TPC 6 CMDQ\n");
2161                retval = -EIO;
2162        }
2163
2164        rc = goya_stop_queue(hdev,
2165                        mmTPC7_QM_GLBL_CFG1,
2166                        mmTPC7_QM_CP_STS,
2167                        mmTPC7_QM_GLBL_STS0);
2168
2169        if (rc) {
2170                dev_err(hdev->dev, "failed to stop TPC 7 QMAN\n");
2171                retval = -EIO;
2172        }
2173
2174        rc = goya_stop_queue(hdev,
2175                        mmTPC7_CMDQ_GLBL_CFG1,
2176                        mmTPC7_CMDQ_CP_STS,
2177                        mmTPC7_CMDQ_GLBL_STS0);
2178
2179        if (rc) {
2180                dev_err(hdev->dev, "failed to stop TPC 7 CMDQ\n");
2181                retval = -EIO;
2182        }
2183
2184        return retval;
2185}
2186
2187static void goya_dma_stall(struct hl_device *hdev)
2188{
2189        struct goya_device *goya = hdev->asic_specific;
2190
2191        if (!(goya->hw_cap_initialized & HW_CAP_DMA))
2192                return;
2193
2194        WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
2195        WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT);
2196        WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT);
2197        WREG32(mmDMA_QM_3_GLBL_CFG1, 1 << DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT);
2198        WREG32(mmDMA_QM_4_GLBL_CFG1, 1 << DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT);
2199}
2200
2201static void goya_tpc_stall(struct hl_device *hdev)
2202{
2203        struct goya_device *goya = hdev->asic_specific;
2204
2205        if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2206                return;
2207
2208        WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2209        WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT);
2210        WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT);
2211        WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC3_CFG_TPC_STALL_V_SHIFT);
2212        WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC4_CFG_TPC_STALL_V_SHIFT);
2213        WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC5_CFG_TPC_STALL_V_SHIFT);
2214        WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC6_CFG_TPC_STALL_V_SHIFT);
2215        WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC7_CFG_TPC_STALL_V_SHIFT);
2216}
2217
2218static void goya_mme_stall(struct hl_device *hdev)
2219{
2220        struct goya_device *goya = hdev->asic_specific;
2221
2222        if (!(goya->hw_cap_initialized & HW_CAP_MME))
2223                return;
2224
2225        WREG32(mmMME_STALL, 0xFFFFFFFF);
2226}
2227
2228static int goya_enable_msix(struct hl_device *hdev)
2229{
2230        struct goya_device *goya = hdev->asic_specific;
2231        int cq_cnt = hdev->asic_prop.completion_queues_count;
2232        int rc, i, irq_cnt_init, irq;
2233
2234        if (goya->hw_cap_initialized & HW_CAP_MSIX)
2235                return 0;
2236
2237        rc = pci_alloc_irq_vectors(hdev->pdev, GOYA_MSIX_ENTRIES,
2238                                GOYA_MSIX_ENTRIES, PCI_IRQ_MSIX);
2239        if (rc < 0) {
2240                dev_err(hdev->dev,
2241                        "MSI-X: Failed to enable support -- %d/%d\n",
2242                        GOYA_MSIX_ENTRIES, rc);
2243                return rc;
2244        }
2245
2246        for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2247                irq = pci_irq_vector(hdev->pdev, i);
2248                rc = request_irq(irq, hl_irq_handler_cq, 0, goya_irq_name[i],
2249                                &hdev->completion_queue[i]);
2250                if (rc) {
2251                        dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2252                        goto free_irqs;
2253                }
2254        }
2255
2256        irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2257
2258        rc = request_irq(irq, hl_irq_handler_eq, 0,
2259                        goya_irq_name[GOYA_EVENT_QUEUE_MSIX_IDX],
2260                        &hdev->event_queue);
2261        if (rc) {
2262                dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2263                goto free_irqs;
2264        }
2265
2266        goya->hw_cap_initialized |= HW_CAP_MSIX;
2267        return 0;
2268
2269free_irqs:
2270        for (i = 0 ; i < irq_cnt_init ; i++)
2271                free_irq(pci_irq_vector(hdev->pdev, i),
2272                        &hdev->completion_queue[i]);
2273
2274        pci_free_irq_vectors(hdev->pdev);
2275        return rc;
2276}
2277
2278static void goya_sync_irqs(struct hl_device *hdev)
2279{
2280        struct goya_device *goya = hdev->asic_specific;
2281        int i;
2282
2283        if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2284                return;
2285
2286        /* Wait for all pending IRQs to be finished */
2287        for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
2288                synchronize_irq(pci_irq_vector(hdev->pdev, i));
2289
2290        synchronize_irq(pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX));
2291}
2292
2293static void goya_disable_msix(struct hl_device *hdev)
2294{
2295        struct goya_device *goya = hdev->asic_specific;
2296        int i, irq;
2297
2298        if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2299                return;
2300
2301        goya_sync_irqs(hdev);
2302
2303        irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2304        free_irq(irq, &hdev->event_queue);
2305
2306        for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
2307                irq = pci_irq_vector(hdev->pdev, i);
2308                free_irq(irq, &hdev->completion_queue[i]);
2309        }
2310
2311        pci_free_irq_vectors(hdev->pdev);
2312
2313        goya->hw_cap_initialized &= ~HW_CAP_MSIX;
2314}
2315
2316static void goya_enable_timestamp(struct hl_device *hdev)
2317{
2318        /* Disable the timestamp counter */
2319        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2320
2321        /* Zero the lower/upper parts of the 64-bit counter */
2322        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2323        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2324
2325        /* Enable the counter */
2326        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2327}
2328
2329static void goya_disable_timestamp(struct hl_device *hdev)
2330{
2331        /* Disable the timestamp counter */
2332        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2333}
2334
2335static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
2336{
2337        u32 wait_timeout_ms;
2338
2339        dev_info(hdev->dev,
2340                "Halting compute engines and disabling interrupts\n");
2341
2342        if (hdev->pldm)
2343                wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2344        else
2345                wait_timeout_ms = GOYA_RESET_WAIT_MSEC;
2346
2347        goya_stop_external_queues(hdev);
2348        goya_stop_internal_queues(hdev);
2349
2350        msleep(wait_timeout_ms);
2351
2352        goya_dma_stall(hdev);
2353        goya_tpc_stall(hdev);
2354        goya_mme_stall(hdev);
2355
2356        msleep(wait_timeout_ms);
2357
2358        goya_disable_external_queues(hdev);
2359        goya_disable_internal_queues(hdev);
2360
2361        goya_disable_timestamp(hdev);
2362
2363        if (hard_reset) {
2364                goya_disable_msix(hdev);
2365                goya_mmu_remove_device_cpu_mappings(hdev);
2366        } else {
2367                goya_sync_irqs(hdev);
2368        }
2369}
2370
2371/*
2372 * goya_load_firmware_to_device() - Load LINUX FW code to device.
2373 * @hdev: Pointer to hl_device structure.
2374 *
2375 * Copy LINUX fw code from firmware file to HBM BAR.
2376 *
2377 * Return: 0 on success, non-zero for failure.
2378 */
2379static int goya_load_firmware_to_device(struct hl_device *hdev)
2380{
2381        void __iomem *dst;
2382
2383        dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
2384
2385        return hl_fw_load_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst, 0, 0);
2386}
2387
2388/*
2389 * goya_load_boot_fit_to_device() - Load boot fit to device.
2390 * @hdev: Pointer to hl_device structure.
2391 *
2392 * Copy boot fit file to SRAM BAR.
2393 *
2394 * Return: 0 on success, non-zero for failure.
2395 */
2396static int goya_load_boot_fit_to_device(struct hl_device *hdev)
2397{
2398        void __iomem *dst;
2399
2400        dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2401
2402        return hl_fw_load_fw_to_device(hdev, GOYA_BOOT_FIT_FILE, dst, 0, 0);
2403}
2404
2405/*
2406 * FW component passes an offset from SRAM_BASE_ADDR in SCRATCHPAD_xx.
2407 * The version string should be located by that offset.
2408 */
2409static int goya_read_device_fw_version(struct hl_device *hdev,
2410                                        enum hl_fw_component fwc)
2411{
2412        const char *name;
2413        u32 ver_off;
2414        char *dest;
2415
2416        switch (fwc) {
2417        case FW_COMP_UBOOT:
2418                ver_off = RREG32(mmUBOOT_VER_OFFSET);
2419                dest = hdev->asic_prop.uboot_ver;
2420                name = "U-Boot";
2421                break;
2422        case FW_COMP_PREBOOT:
2423                ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2424                dest = hdev->asic_prop.preboot_ver;
2425                name = "Preboot";
2426                break;
2427        default:
2428                dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2429                return -EIO;
2430        }
2431
2432        ver_off &= ~((u32)SRAM_BASE_ADDR);
2433
2434        if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2435                memcpy_fromio(dest, hdev->pcie_bar[SRAM_CFG_BAR_ID] + ver_off,
2436                                                        VERSION_MAX_LEN);
2437        } else {
2438                dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2439                                                                name, ver_off);
2440                strcpy(dest, "unavailable");
2441
2442                return -EIO;
2443        }
2444
2445        return 0;
2446}
2447
2448static int goya_init_cpu(struct hl_device *hdev)
2449{
2450        struct goya_device *goya = hdev->asic_specific;
2451        int rc;
2452
2453        if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
2454                return 0;
2455
2456        if (goya->hw_cap_initialized & HW_CAP_CPU)
2457                return 0;
2458
2459        /*
2460         * Before pushing u-boot/linux to device, need to set the ddr bar to
2461         * base address of dram
2462         */
2463        if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
2464                dev_err(hdev->dev,
2465                        "failed to map DDR bar to DRAM base address\n");
2466                return -EIO;
2467        }
2468
2469        rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2470                        mmPSOC_GLOBAL_CONF_UBOOT_MAGIC,
2471                        mmCPU_CMD_STATUS_TO_HOST,
2472                        mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
2473                        false, GOYA_CPU_TIMEOUT_USEC,
2474                        GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
2475
2476        if (rc)
2477                return rc;
2478
2479        goya->hw_cap_initialized |= HW_CAP_CPU;
2480
2481        return 0;
2482}
2483
2484static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
2485                                                u64 phys_addr)
2486{
2487        u32 status, timeout_usec;
2488        int rc;
2489
2490        if (hdev->pldm)
2491                timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
2492        else
2493                timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
2494
2495        WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
2496        WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
2497        WREG32(MMU_ASID_BUSY, 0x80000000 | asid);
2498
2499        rc = hl_poll_timeout(
2500                hdev,
2501                MMU_ASID_BUSY,
2502                status,
2503                !(status & 0x80000000),
2504                1000,
2505                timeout_usec);
2506
2507        if (rc) {
2508                dev_err(hdev->dev,
2509                        "Timeout during MMU hop0 config of asid %d\n", asid);
2510                return rc;
2511        }
2512
2513        return 0;
2514}
2515
2516int goya_mmu_init(struct hl_device *hdev)
2517{
2518        struct asic_fixed_properties *prop = &hdev->asic_prop;
2519        struct goya_device *goya = hdev->asic_specific;
2520        u64 hop0_addr;
2521        int rc, i;
2522
2523        if (!hdev->mmu_enable)
2524                return 0;
2525
2526        if (goya->hw_cap_initialized & HW_CAP_MMU)
2527                return 0;
2528
2529        hdev->dram_default_page_mapping = true;
2530
2531        for (i = 0 ; i < prop->max_asid ; i++) {
2532                hop0_addr = prop->mmu_pgt_addr +
2533                                (i * prop->mmu_hop_table_size);
2534
2535                rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2536                if (rc) {
2537                        dev_err(hdev->dev,
2538                                "failed to set hop0 addr for asid %d\n", i);
2539                        goto err;
2540                }
2541        }
2542
2543        goya->hw_cap_initialized |= HW_CAP_MMU;
2544
2545        /* init MMU cache manage page */
2546        WREG32(mmSTLB_CACHE_INV_BASE_39_8,
2547                                lower_32_bits(MMU_CACHE_MNG_ADDR >> 8));
2548        WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2549
2550        /* Remove follower feature due to performance bug */
2551        WREG32_AND(mmSTLB_STLB_FEATURE_EN,
2552                        (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
2553
2554        hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
2555                                        VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK);
2556
2557        WREG32(mmMMU_MMU_ENABLE, 1);
2558        WREG32(mmMMU_SPI_MASK, 0xF);
2559
2560        return 0;
2561
2562err:
2563        return rc;
2564}
2565
2566/*
2567 * goya_hw_init - Goya hardware initialization code
2568 *
2569 * @hdev: pointer to hl_device structure
2570 *
2571 * Returns 0 on success
2572 *
2573 */
2574static int goya_hw_init(struct hl_device *hdev)
2575{
2576        struct asic_fixed_properties *prop = &hdev->asic_prop;
2577        int rc;
2578
2579        /* Perform read from the device to make sure device is up */
2580        RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2581
2582        /*
2583         * Let's mark in the H/W that we have reached this point. We check
2584         * this value in the reset_before_init function to understand whether
2585         * we need to reset the chip before doing H/W init. This register is
2586         * cleared by the H/W upon H/W reset
2587         */
2588        WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2589
2590        rc = goya_init_cpu(hdev);
2591        if (rc) {
2592                dev_err(hdev->dev, "failed to initialize CPU\n");
2593                return rc;
2594        }
2595
2596        goya_tpc_mbist_workaround(hdev);
2597
2598        goya_init_golden_registers(hdev);
2599
2600        /*
2601         * After CPU initialization is finished, change DDR bar mapping inside
2602         * iATU to point to the start address of the MMU page tables
2603         */
2604        if (goya_set_ddr_bar_base(hdev, (MMU_PAGE_TABLES_ADDR &
2605                        ~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) {
2606                dev_err(hdev->dev,
2607                        "failed to map DDR bar to MMU page tables\n");
2608                return -EIO;
2609        }
2610
2611        rc = goya_mmu_init(hdev);
2612        if (rc)
2613                return rc;
2614
2615        goya_init_security(hdev);
2616
2617        goya_init_dma_qmans(hdev);
2618
2619        goya_init_mme_qmans(hdev);
2620
2621        goya_init_tpc_qmans(hdev);
2622
2623        goya_enable_timestamp(hdev);
2624
2625        /* MSI-X must be enabled before CPU queues are initialized */
2626        rc = goya_enable_msix(hdev);
2627        if (rc)
2628                goto disable_queues;
2629
2630        /* Perform read from the device to flush all MSI-X configuration */
2631        RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2632
2633        return 0;
2634
2635disable_queues:
2636        goya_disable_internal_queues(hdev);
2637        goya_disable_external_queues(hdev);
2638
2639        return rc;
2640}
2641
2642/*
2643 * goya_hw_fini - Goya hardware tear-down code
2644 *
2645 * @hdev: pointer to hl_device structure
2646 * @hard_reset: should we do hard reset to all engines or just reset the
2647 *              compute/dma engines
2648 */
2649static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
2650{
2651        struct goya_device *goya = hdev->asic_specific;
2652        u32 reset_timeout_ms, cpu_timeout_ms, status;
2653
2654        if (hdev->pldm) {
2655                reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC;
2656                cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2657        } else {
2658                reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC;
2659                cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
2660        }
2661
2662        if (hard_reset) {
2663                /* I don't know what is the state of the CPU so make sure it is
2664                 * stopped in any means necessary
2665                 */
2666                WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
2667                WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2668                        GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
2669
2670                msleep(cpu_timeout_ms);
2671
2672                goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
2673                goya_disable_clk_rlx(hdev);
2674                goya_set_pll_refclk(hdev);
2675
2676                WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, RESET_ALL);
2677                dev_info(hdev->dev,
2678                        "Issued HARD reset command, going to wait %dms\n",
2679                        reset_timeout_ms);
2680        } else {
2681                WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, DMA_MME_TPC_RESET);
2682                dev_info(hdev->dev,
2683                        "Issued SOFT reset command, going to wait %dms\n",
2684                        reset_timeout_ms);
2685        }
2686
2687        /*
2688         * After hard reset, we can't poll the BTM_FSM register because the PSOC
2689         * itself is in reset. In either reset we need to wait until the reset
2690         * is deasserted
2691         */
2692        msleep(reset_timeout_ms);
2693
2694        status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
2695        if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
2696                dev_err(hdev->dev,
2697                        "Timeout while waiting for device to reset 0x%x\n",
2698                        status);
2699
2700        if (!hard_reset && goya) {
2701                goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME |
2702                                                HW_CAP_GOLDEN | HW_CAP_TPC);
2703                WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2704                                GOYA_ASYNC_EVENT_ID_SOFT_RESET);
2705                return;
2706        }
2707
2708        /* Chicken bit to re-initiate boot sequencer flow */
2709        WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START,
2710                1 << PSOC_GLOBAL_CONF_BOOT_SEQ_RE_START_IND_SHIFT);
2711        /* Move boot manager FSM to pre boot sequencer init state */
2712        WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM,
2713                        0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT);
2714
2715        if (goya) {
2716                goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
2717                                HW_CAP_DDR_0 | HW_CAP_DDR_1 |
2718                                HW_CAP_DMA | HW_CAP_MME |
2719                                HW_CAP_MMU | HW_CAP_TPC_MBIST |
2720                                HW_CAP_GOLDEN | HW_CAP_TPC);
2721
2722                memset(goya->events_stat, 0, sizeof(goya->events_stat));
2723        }
2724}
2725
2726int goya_suspend(struct hl_device *hdev)
2727{
2728        int rc;
2729
2730        rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
2731        if (rc)
2732                dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
2733
2734        return rc;
2735}
2736
2737int goya_resume(struct hl_device *hdev)
2738{
2739        return goya_init_iatu(hdev);
2740}
2741
2742static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
2743                        void *cpu_addr, dma_addr_t dma_addr, size_t size)
2744{
2745        int rc;
2746
2747        vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
2748                        VM_DONTCOPY | VM_NORESERVE;
2749
2750        rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
2751                                (dma_addr - HOST_PHYS_BASE), size);
2752        if (rc)
2753                dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
2754
2755        return rc;
2756}
2757
2758void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
2759{
2760        u32 db_reg_offset, db_value;
2761
2762        switch (hw_queue_id) {
2763        case GOYA_QUEUE_ID_DMA_0:
2764                db_reg_offset = mmDMA_QM_0_PQ_PI;
2765                break;
2766
2767        case GOYA_QUEUE_ID_DMA_1:
2768                db_reg_offset = mmDMA_QM_1_PQ_PI;
2769                break;
2770
2771        case GOYA_QUEUE_ID_DMA_2:
2772                db_reg_offset = mmDMA_QM_2_PQ_PI;
2773                break;
2774
2775        case GOYA_QUEUE_ID_DMA_3:
2776                db_reg_offset = mmDMA_QM_3_PQ_PI;
2777                break;
2778
2779        case GOYA_QUEUE_ID_DMA_4:
2780                db_reg_offset = mmDMA_QM_4_PQ_PI;
2781                break;
2782
2783        case GOYA_QUEUE_ID_CPU_PQ:
2784                db_reg_offset = mmCPU_IF_PF_PQ_PI;
2785                break;
2786
2787        case GOYA_QUEUE_ID_MME:
2788                db_reg_offset = mmMME_QM_PQ_PI;
2789                break;
2790
2791        case GOYA_QUEUE_ID_TPC0:
2792                db_reg_offset = mmTPC0_QM_PQ_PI;
2793                break;
2794
2795        case GOYA_QUEUE_ID_TPC1:
2796                db_reg_offset = mmTPC1_QM_PQ_PI;
2797                break;
2798
2799        case GOYA_QUEUE_ID_TPC2:
2800                db_reg_offset = mmTPC2_QM_PQ_PI;
2801                break;
2802
2803        case GOYA_QUEUE_ID_TPC3:
2804                db_reg_offset = mmTPC3_QM_PQ_PI;
2805                break;
2806
2807        case GOYA_QUEUE_ID_TPC4:
2808                db_reg_offset = mmTPC4_QM_PQ_PI;
2809                break;
2810
2811        case GOYA_QUEUE_ID_TPC5:
2812                db_reg_offset = mmTPC5_QM_PQ_PI;
2813                break;
2814
2815        case GOYA_QUEUE_ID_TPC6:
2816                db_reg_offset = mmTPC6_QM_PQ_PI;
2817                break;
2818
2819        case GOYA_QUEUE_ID_TPC7:
2820                db_reg_offset = mmTPC7_QM_PQ_PI;
2821                break;
2822
2823        default:
2824                /* Should never get here */
2825                dev_err(hdev->dev, "H/W queue %d is invalid. Can't set pi\n",
2826                        hw_queue_id);
2827                return;
2828        }
2829
2830        db_value = pi;
2831
2832        /* ring the doorbell */
2833        WREG32(db_reg_offset, db_value);
2834
2835        if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ) {
2836                /* make sure device CPU will read latest data from host */
2837                mb();
2838                WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2839                                GOYA_ASYNC_EVENT_ID_PI_UPDATE);
2840        }
2841}
2842
2843void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
2844{
2845        /* The QMANs are on the SRAM so need to copy to IO space */
2846        memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd));
2847}
2848
2849static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
2850                                        dma_addr_t *dma_handle, gfp_t flags)
2851{
2852        void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
2853                                                dma_handle, flags);
2854
2855        /* Shift to the device's base physical address of host memory */
2856        if (kernel_addr)
2857                *dma_handle += HOST_PHYS_BASE;
2858
2859        return kernel_addr;
2860}
2861
2862static void goya_dma_free_coherent(struct hl_device *hdev, size_t size,
2863                                        void *cpu_addr, dma_addr_t dma_handle)
2864{
2865        /* Cancel the device's base physical address of host memory */
2866        dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
2867
2868        dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
2869}
2870
2871int goya_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
2872{
2873        return 0;
2874}
2875
2876void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
2877                                dma_addr_t *dma_handle, u16 *queue_len)
2878{
2879        void *base;
2880        u32 offset;
2881
2882        *dma_handle = hdev->asic_prop.sram_base_address;
2883
2884        base = (void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
2885
2886        switch (queue_id) {
2887        case GOYA_QUEUE_ID_MME:
2888                offset = MME_QMAN_BASE_OFFSET;
2889                *queue_len = MME_QMAN_LENGTH;
2890                break;
2891        case GOYA_QUEUE_ID_TPC0:
2892                offset = TPC0_QMAN_BASE_OFFSET;
2893                *queue_len = TPC_QMAN_LENGTH;
2894                break;
2895        case GOYA_QUEUE_ID_TPC1:
2896                offset = TPC1_QMAN_BASE_OFFSET;
2897                *queue_len = TPC_QMAN_LENGTH;
2898                break;
2899        case GOYA_QUEUE_ID_TPC2:
2900                offset = TPC2_QMAN_BASE_OFFSET;
2901                *queue_len = TPC_QMAN_LENGTH;
2902                break;
2903        case GOYA_QUEUE_ID_TPC3:
2904                offset = TPC3_QMAN_BASE_OFFSET;
2905                *queue_len = TPC_QMAN_LENGTH;
2906                break;
2907        case GOYA_QUEUE_ID_TPC4:
2908                offset = TPC4_QMAN_BASE_OFFSET;
2909                *queue_len = TPC_QMAN_LENGTH;
2910                break;
2911        case GOYA_QUEUE_ID_TPC5:
2912                offset = TPC5_QMAN_BASE_OFFSET;
2913                *queue_len = TPC_QMAN_LENGTH;
2914                break;
2915        case GOYA_QUEUE_ID_TPC6:
2916                offset = TPC6_QMAN_BASE_OFFSET;
2917                *queue_len = TPC_QMAN_LENGTH;
2918                break;
2919        case GOYA_QUEUE_ID_TPC7:
2920                offset = TPC7_QMAN_BASE_OFFSET;
2921                *queue_len = TPC_QMAN_LENGTH;
2922                break;
2923        default:
2924                dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
2925                return NULL;
2926        }
2927
2928        base += offset;
2929        *dma_handle += offset;
2930
2931        return base;
2932}
2933
2934static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
2935{
2936        struct packet_msg_prot *fence_pkt;
2937        u32 *fence_ptr;
2938        dma_addr_t fence_dma_addr;
2939        struct hl_cb *cb;
2940        u32 tmp, timeout;
2941        int rc;
2942
2943        if (hdev->pldm)
2944                timeout = GOYA_PLDM_QMAN0_TIMEOUT_USEC;
2945        else
2946                timeout = HL_DEVICE_TIMEOUT_USEC;
2947
2948        if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
2949                dev_err_ratelimited(hdev->dev,
2950                        "Can't send driver job on QMAN0 because the device is not idle\n");
2951                return -EBUSY;
2952        }
2953
2954        fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
2955                                                        &fence_dma_addr);
2956        if (!fence_ptr) {
2957                dev_err(hdev->dev,
2958                        "Failed to allocate fence memory for QMAN0\n");
2959                return -ENOMEM;
2960        }
2961
2962        goya_qman0_set_security(hdev, true);
2963
2964        cb = job->patched_cb;
2965
2966        fence_pkt = cb->kernel_address +
2967                        job->job_cb_size - sizeof(struct packet_msg_prot);
2968
2969        tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
2970                        (1 << GOYA_PKT_CTL_EB_SHIFT) |
2971                        (1 << GOYA_PKT_CTL_MB_SHIFT);
2972        fence_pkt->ctl = cpu_to_le32(tmp);
2973        fence_pkt->value = cpu_to_le32(GOYA_QMAN0_FENCE_VAL);
2974        fence_pkt->addr = cpu_to_le64(fence_dma_addr);
2975
2976        rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_DMA_0,
2977                                        job->job_cb_size, cb->bus_address);
2978        if (rc) {
2979                dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
2980                goto free_fence_ptr;
2981        }
2982
2983        rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
2984                                (tmp == GOYA_QMAN0_FENCE_VAL), 1000,
2985                                timeout, true);
2986
2987        hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
2988
2989        if (rc == -ETIMEDOUT) {
2990                dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
2991                goto free_fence_ptr;
2992        }
2993
2994free_fence_ptr:
2995        hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
2996                                        fence_dma_addr);
2997
2998        goya_qman0_set_security(hdev, false);
2999
3000        return rc;
3001}
3002
3003int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
3004                                u32 timeout, u64 *result)
3005{
3006        struct goya_device *goya = hdev->asic_specific;
3007
3008        if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) {
3009                if (result)
3010                        *result = 0;
3011                return 0;
3012        }
3013
3014        if (!timeout)
3015                timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC;
3016
3017        return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len,
3018                                        timeout, result);
3019}
3020
3021int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3022{
3023        struct packet_msg_prot *fence_pkt;
3024        dma_addr_t pkt_dma_addr;
3025        u32 fence_val, tmp;
3026        dma_addr_t fence_dma_addr;
3027        u32 *fence_ptr;
3028        int rc;
3029
3030        fence_val = GOYA_QMAN0_FENCE_VAL;
3031
3032        fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3033                                                        &fence_dma_addr);
3034        if (!fence_ptr) {
3035                dev_err(hdev->dev,
3036                        "Failed to allocate memory for H/W queue %d testing\n",
3037                        hw_queue_id);
3038                return -ENOMEM;
3039        }
3040
3041        *fence_ptr = 0;
3042
3043        fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
3044                                        sizeof(struct packet_msg_prot),
3045                                        GFP_KERNEL, &pkt_dma_addr);
3046        if (!fence_pkt) {
3047                dev_err(hdev->dev,
3048                        "Failed to allocate packet for H/W queue %d testing\n",
3049                        hw_queue_id);
3050                rc = -ENOMEM;
3051                goto free_fence_ptr;
3052        }
3053
3054        tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3055                        (1 << GOYA_PKT_CTL_EB_SHIFT) |
3056                        (1 << GOYA_PKT_CTL_MB_SHIFT);
3057        fence_pkt->ctl = cpu_to_le32(tmp);
3058        fence_pkt->value = cpu_to_le32(fence_val);
3059        fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3060
3061        rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3062                                        sizeof(struct packet_msg_prot),
3063                                        pkt_dma_addr);
3064        if (rc) {
3065                dev_err(hdev->dev,
3066                        "Failed to send fence packet to H/W queue %d\n",
3067                        hw_queue_id);
3068                goto free_pkt;
3069        }
3070
3071        rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3072                                        1000, GOYA_TEST_QUEUE_WAIT_USEC, true);
3073
3074        hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3075
3076        if (rc == -ETIMEDOUT) {
3077                dev_err(hdev->dev,
3078                        "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3079                        hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3080                rc = -EIO;
3081        }
3082
3083free_pkt:
3084        hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3085                                        pkt_dma_addr);
3086free_fence_ptr:
3087        hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3088                                        fence_dma_addr);
3089        return rc;
3090}
3091
3092int goya_test_cpu_queue(struct hl_device *hdev)
3093{
3094        struct goya_device *goya = hdev->asic_specific;
3095
3096        /*
3097         * check capability here as send_cpu_message() won't update the result
3098         * value if no capability
3099         */
3100        if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
3101                return 0;
3102
3103        return hl_fw_test_cpu_queue(hdev);
3104}
3105
3106int goya_test_queues(struct hl_device *hdev)
3107{
3108        int i, rc, ret_val = 0;
3109
3110        for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
3111                rc = goya_test_queue(hdev, i);
3112                if (rc)
3113                        ret_val = -EINVAL;
3114        }
3115
3116        return ret_val;
3117}
3118
3119static void *goya_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3120                                        gfp_t mem_flags, dma_addr_t *dma_handle)
3121{
3122        void *kernel_addr;
3123
3124        if (size > GOYA_DMA_POOL_BLK_SIZE)
3125                return NULL;
3126
3127        kernel_addr =  dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3128
3129        /* Shift to the device's base physical address of host memory */
3130        if (kernel_addr)
3131                *dma_handle += HOST_PHYS_BASE;
3132
3133        return kernel_addr;
3134}
3135
3136static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
3137                                dma_addr_t dma_addr)
3138{
3139        /* Cancel the device's base physical address of host memory */
3140        dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3141
3142        dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3143}
3144
3145void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
3146                                        dma_addr_t *dma_handle)
3147{
3148        void *vaddr;
3149
3150        vaddr = hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3151        *dma_handle = (*dma_handle) - hdev->cpu_accessible_dma_address +
3152                        VA_CPU_ACCESSIBLE_MEM_ADDR;
3153
3154        return vaddr;
3155}
3156
3157void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
3158                                        void *vaddr)
3159{
3160        hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3161}
3162
3163static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3164                                int nents, enum dma_data_direction dir)
3165{
3166        struct scatterlist *sg;
3167        int i;
3168
3169        if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3170                return -ENOMEM;
3171
3172        /* Shift to the device's base physical address of host memory */
3173        for_each_sg(sgl, sg, nents, i)
3174                sg->dma_address += HOST_PHYS_BASE;
3175
3176        return 0;
3177}
3178
3179static void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3180                                int nents, enum dma_data_direction dir)
3181{
3182        struct scatterlist *sg;
3183        int i;
3184
3185        /* Cancel the device's base physical address of host memory */
3186        for_each_sg(sgl, sg, nents, i)
3187                sg->dma_address -= HOST_PHYS_BASE;
3188
3189        dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3190}
3191
3192u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
3193{
3194        struct scatterlist *sg, *sg_next_iter;
3195        u32 count, dma_desc_cnt;
3196        u64 len, len_next;
3197        dma_addr_t addr, addr_next;
3198
3199        dma_desc_cnt = 0;
3200
3201        for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3202
3203                len = sg_dma_len(sg);
3204                addr = sg_dma_address(sg);
3205
3206                if (len == 0)
3207                        break;
3208
3209                while ((count + 1) < sgt->nents) {
3210                        sg_next_iter = sg_next(sg);
3211                        len_next = sg_dma_len(sg_next_iter);
3212                        addr_next = sg_dma_address(sg_next_iter);
3213
3214                        if (len_next == 0)
3215                                break;
3216
3217                        if ((addr + len == addr_next) &&
3218                                (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3219                                len += len_next;
3220                                count++;
3221                                sg = sg_next_iter;
3222                        } else {
3223                                break;
3224                        }
3225                }
3226
3227                dma_desc_cnt++;
3228        }
3229
3230        return dma_desc_cnt * sizeof(struct packet_lin_dma);
3231}
3232
3233static int goya_pin_memory_before_cs(struct hl_device *hdev,
3234                                struct hl_cs_parser *parser,
3235                                struct packet_lin_dma *user_dma_pkt,
3236                                u64 addr, enum dma_data_direction dir)
3237{
3238        struct hl_userptr *userptr;
3239        int rc;
3240
3241        if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3242                        parser->job_userptr_list, &userptr))
3243                goto already_pinned;
3244
3245        userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
3246        if (!userptr)
3247                return -ENOMEM;
3248
3249        rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3250                                userptr);
3251        if (rc)
3252                goto free_userptr;
3253
3254        list_add_tail(&userptr->job_node, parser->job_userptr_list);
3255
3256        rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3257                                        userptr->sgt->nents, dir);
3258        if (rc) {
3259                dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3260                goto unpin_memory;
3261        }
3262
3263        userptr->dma_mapped = true;
3264        userptr->dir = dir;
3265
3266already_pinned:
3267        parser->patched_cb_size +=
3268                        goya_get_dma_desc_list_size(hdev, userptr->sgt);
3269
3270        return 0;
3271
3272unpin_memory:
3273        list_del(&userptr->job_node);
3274        hl_unpin_host_memory(hdev, userptr);
3275free_userptr:
3276        kfree(userptr);
3277        return rc;
3278}
3279
3280static int goya_validate_dma_pkt_host(struct hl_device *hdev,
3281                                struct hl_cs_parser *parser,
3282                                struct packet_lin_dma *user_dma_pkt)
3283{
3284        u64 device_memory_addr, addr;
3285        enum dma_data_direction dir;
3286        enum goya_dma_direction user_dir;
3287        bool sram_addr = true;
3288        bool skip_host_mem_pin = false;
3289        bool user_memset;
3290        u32 ctl;
3291        int rc = 0;
3292
3293        ctl = le32_to_cpu(user_dma_pkt->ctl);
3294
3295        user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3296                        GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3297
3298        user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3299                        GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3300
3301        switch (user_dir) {
3302        case DMA_HOST_TO_DRAM:
3303                dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n");
3304                dir = DMA_TO_DEVICE;
3305                sram_addr = false;
3306                addr = le64_to_cpu(user_dma_pkt->src_addr);
3307                device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3308                if (user_memset)
3309                        skip_host_mem_pin = true;
3310                break;
3311
3312        case DMA_DRAM_TO_HOST:
3313                dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n");
3314                dir = DMA_FROM_DEVICE;
3315                sram_addr = false;
3316                addr = le64_to_cpu(user_dma_pkt->dst_addr);
3317                device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3318                break;
3319
3320        case DMA_HOST_TO_SRAM:
3321                dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n");
3322                dir = DMA_TO_DEVICE;
3323                addr = le64_to_cpu(user_dma_pkt->src_addr);
3324                device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3325                if (user_memset)
3326                        skip_host_mem_pin = true;
3327                break;
3328
3329        case DMA_SRAM_TO_HOST:
3330                dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n");
3331                dir = DMA_FROM_DEVICE;
3332                addr = le64_to_cpu(user_dma_pkt->dst_addr);
3333                device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3334                break;
3335        default:
3336                dev_err(hdev->dev, "DMA direction is undefined\n");
3337                return -EFAULT;
3338        }
3339
3340        if (sram_addr) {
3341                if (!hl_mem_area_inside_range(device_memory_addr,
3342                                le32_to_cpu(user_dma_pkt->tsize),
3343                                hdev->asic_prop.sram_user_base_address,
3344                                hdev->asic_prop.sram_end_address)) {
3345
3346                        dev_err(hdev->dev,
3347                                "SRAM address 0x%llx + 0x%x is invalid\n",
3348                                device_memory_addr,
3349                                user_dma_pkt->tsize);
3350                        return -EFAULT;
3351                }
3352        } else {
3353                if (!hl_mem_area_inside_range(device_memory_addr,
3354                                le32_to_cpu(user_dma_pkt->tsize),
3355                                hdev->asic_prop.dram_user_base_address,
3356                                hdev->asic_prop.dram_end_address)) {
3357
3358                        dev_err(hdev->dev,
3359                                "DRAM address 0x%llx + 0x%x is invalid\n",
3360                                device_memory_addr,
3361                                user_dma_pkt->tsize);
3362                        return -EFAULT;
3363                }
3364        }
3365
3366        if (skip_host_mem_pin)
3367                parser->patched_cb_size += sizeof(*user_dma_pkt);
3368        else {
3369                if ((dir == DMA_TO_DEVICE) &&
3370                                (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1)) {
3371                        dev_err(hdev->dev,
3372                                "Can't DMA from host on queue other then 1\n");
3373                        return -EFAULT;
3374                }
3375
3376                rc = goya_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3377                                                addr, dir);
3378        }
3379
3380        return rc;
3381}
3382
3383static int goya_validate_dma_pkt_no_host(struct hl_device *hdev,
3384                                struct hl_cs_parser *parser,
3385                                struct packet_lin_dma *user_dma_pkt)
3386{
3387        u64 sram_memory_addr, dram_memory_addr;
3388        enum goya_dma_direction user_dir;
3389        u32 ctl;
3390
3391        ctl = le32_to_cpu(user_dma_pkt->ctl);
3392        user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3393                        GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3394
3395        if (user_dir == DMA_DRAM_TO_SRAM) {
3396                dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n");
3397                dram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3398                sram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3399        } else {
3400                dev_dbg(hdev->dev, "DMA direction is SRAM --> DRAM\n");
3401                sram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3402                dram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3403        }
3404
3405        if (!hl_mem_area_inside_range(sram_memory_addr,
3406                                le32_to_cpu(user_dma_pkt->tsize),
3407                                hdev->asic_prop.sram_user_base_address,
3408                                hdev->asic_prop.sram_end_address)) {
3409                dev_err(hdev->dev, "SRAM address 0x%llx + 0x%x is invalid\n",
3410                        sram_memory_addr, user_dma_pkt->tsize);
3411                return -EFAULT;
3412        }
3413
3414        if (!hl_mem_area_inside_range(dram_memory_addr,
3415                                le32_to_cpu(user_dma_pkt->tsize),
3416                                hdev->asic_prop.dram_user_base_address,
3417                                hdev->asic_prop.dram_end_address)) {
3418                dev_err(hdev->dev, "DRAM address 0x%llx + 0x%x is invalid\n",
3419                        dram_memory_addr, user_dma_pkt->tsize);
3420                return -EFAULT;
3421        }
3422
3423        parser->patched_cb_size += sizeof(*user_dma_pkt);
3424
3425        return 0;
3426}
3427
3428static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3429                                struct hl_cs_parser *parser,
3430                                struct packet_lin_dma *user_dma_pkt)
3431{
3432        enum goya_dma_direction user_dir;
3433        u32 ctl;
3434        int rc;
3435
3436        dev_dbg(hdev->dev, "DMA packet details:\n");
3437        dev_dbg(hdev->dev, "source == 0x%llx\n",
3438                le64_to_cpu(user_dma_pkt->src_addr));
3439        dev_dbg(hdev->dev, "destination == 0x%llx\n",
3440                le64_to_cpu(user_dma_pkt->dst_addr));
3441        dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3442
3443        ctl = le32_to_cpu(user_dma_pkt->ctl);
3444        user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3445                        GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3446
3447        /*
3448         * Special handling for DMA with size 0. The H/W has a bug where
3449         * this can cause the QMAN DMA to get stuck, so block it here.
3450         */
3451        if (user_dma_pkt->tsize == 0) {
3452                dev_err(hdev->dev,
3453                        "Got DMA with size 0, might reset the device\n");
3454                return -EINVAL;
3455        }
3456
3457        if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM))
3458                rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt);
3459        else
3460                rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt);
3461
3462        return rc;
3463}
3464
3465static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
3466                                struct hl_cs_parser *parser,
3467                                struct packet_lin_dma *user_dma_pkt)
3468{
3469        dev_dbg(hdev->dev, "DMA packet details:\n");
3470        dev_dbg(hdev->dev, "source == 0x%llx\n",
3471                le64_to_cpu(user_dma_pkt->src_addr));
3472        dev_dbg(hdev->dev, "destination == 0x%llx\n",
3473                le64_to_cpu(user_dma_pkt->dst_addr));
3474        dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3475
3476        /*
3477         * WA for HW-23.
3478         * We can't allow user to read from Host using QMANs other than 1.
3479         * PMMU and HPMMU addresses are equal, check only one of them.
3480         */
3481        if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
3482                hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
3483                                le32_to_cpu(user_dma_pkt->tsize),
3484                                hdev->asic_prop.pmmu.start_addr,
3485                                hdev->asic_prop.pmmu.end_addr)) {
3486                dev_err(hdev->dev,
3487                        "Can't DMA from host on queue other then 1\n");
3488                return -EFAULT;
3489        }
3490
3491        if (user_dma_pkt->tsize == 0) {
3492                dev_err(hdev->dev,
3493                        "Got DMA with size 0, might reset the device\n");
3494                return -EINVAL;
3495        }
3496
3497        parser->patched_cb_size += sizeof(*user_dma_pkt);
3498
3499        return 0;
3500}
3501
3502static int goya_validate_wreg32(struct hl_device *hdev,
3503                                struct hl_cs_parser *parser,
3504                                struct packet_wreg32 *wreg_pkt)
3505{
3506        struct goya_device *goya = hdev->asic_specific;
3507        u32 sob_start_addr, sob_end_addr;
3508        u16 reg_offset;
3509
3510        reg_offset = le32_to_cpu(wreg_pkt->ctl) &
3511                        GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK;
3512
3513        dev_dbg(hdev->dev, "WREG32 packet details:\n");
3514        dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
3515        dev_dbg(hdev->dev, "value      == 0x%x\n",
3516                le32_to_cpu(wreg_pkt->value));
3517
3518        if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) {
3519                dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
3520                        reg_offset);
3521                return -EPERM;
3522        }
3523
3524        /*
3525         * With MMU, DMA channels are not secured, so it doesn't matter where
3526         * the WR COMP will be written to because it will go out with
3527         * non-secured property
3528         */
3529        if (goya->hw_cap_initialized & HW_CAP_MMU)
3530                return 0;
3531
3532        sob_start_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
3533        sob_end_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1023);
3534
3535        if ((le32_to_cpu(wreg_pkt->value) < sob_start_addr) ||
3536                        (le32_to_cpu(wreg_pkt->value) > sob_end_addr)) {
3537
3538                dev_err(hdev->dev, "WREG32 packet with illegal value 0x%x\n",
3539                        wreg_pkt->value);
3540                return -EPERM;
3541        }
3542
3543        return 0;
3544}
3545
3546static int goya_validate_cb(struct hl_device *hdev,
3547                        struct hl_cs_parser *parser, bool is_mmu)
3548{
3549        u32 cb_parsed_length = 0;
3550        int rc = 0;
3551
3552        parser->patched_cb_size = 0;
3553
3554        /* cb_user_size is more than 0 so loop will always be executed */
3555        while (cb_parsed_length < parser->user_cb_size) {
3556                enum packet_id pkt_id;
3557                u16 pkt_size;
3558                struct goya_packet *user_pkt;
3559
3560                user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3561
3562                pkt_id = (enum packet_id) (
3563                                (le64_to_cpu(user_pkt->header) &
3564                                PACKET_HEADER_PACKET_ID_MASK) >>
3565                                        PACKET_HEADER_PACKET_ID_SHIFT);
3566
3567                if (!validate_packet_id(pkt_id)) {
3568                        dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3569                        rc = -EINVAL;
3570                        break;
3571                }
3572
3573                pkt_size = goya_packet_sizes[pkt_id];
3574                cb_parsed_length += pkt_size;
3575                if (cb_parsed_length > parser->user_cb_size) {
3576                        dev_err(hdev->dev,
3577                                "packet 0x%x is out of CB boundary\n", pkt_id);
3578                        rc = -EINVAL;
3579                        break;
3580                }
3581
3582                switch (pkt_id) {
3583                case PACKET_WREG_32:
3584                        /*
3585                         * Although it is validated after copy in patch_cb(),
3586                         * need to validate here as well because patch_cb() is
3587                         * not called in MMU path while this function is called
3588                         */
3589                        rc = goya_validate_wreg32(hdev,
3590                                parser, (struct packet_wreg32 *) user_pkt);
3591                        parser->patched_cb_size += pkt_size;
3592                        break;
3593
3594                case PACKET_WREG_BULK:
3595                        dev_err(hdev->dev,
3596                                "User not allowed to use WREG_BULK\n");
3597                        rc = -EPERM;
3598                        break;
3599
3600                case PACKET_MSG_PROT:
3601                        dev_err(hdev->dev,
3602                                "User not allowed to use MSG_PROT\n");
3603                        rc = -EPERM;
3604                        break;
3605
3606                case PACKET_CP_DMA:
3607                        dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3608                        rc = -EPERM;
3609                        break;
3610
3611                case PACKET_STOP:
3612                        dev_err(hdev->dev, "User not allowed to use STOP\n");
3613                        rc = -EPERM;
3614                        break;
3615
3616                case PACKET_LIN_DMA:
3617                        if (is_mmu)
3618                                rc = goya_validate_dma_pkt_mmu(hdev, parser,
3619                                        (struct packet_lin_dma *) user_pkt);
3620                        else
3621                                rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
3622                                        (struct packet_lin_dma *) user_pkt);
3623                        break;
3624
3625                case PACKET_MSG_LONG:
3626                case PACKET_MSG_SHORT:
3627                case PACKET_FENCE:
3628                case PACKET_NOP:
3629                        parser->patched_cb_size += pkt_size;
3630                        break;
3631
3632                default:
3633                        dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3634                                pkt_id);
3635                        rc = -EINVAL;
3636                        break;
3637                }
3638
3639                if (rc)
3640                        break;
3641        }
3642
3643        /*
3644         * The new CB should have space at the end for two MSG_PROT packets:
3645         * 1. A packet that will act as a completion packet
3646         * 2. A packet that will generate MSI-X interrupt
3647         */
3648        parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3649
3650        return rc;
3651}
3652
3653static int goya_patch_dma_packet(struct hl_device *hdev,
3654                                struct hl_cs_parser *parser,
3655                                struct packet_lin_dma *user_dma_pkt,
3656                                struct packet_lin_dma *new_dma_pkt,
3657                                u32 *new_dma_pkt_size)
3658{
3659        struct hl_userptr *userptr;
3660        struct scatterlist *sg, *sg_next_iter;
3661        u32 count, dma_desc_cnt;
3662        u64 len, len_next;
3663        dma_addr_t dma_addr, dma_addr_next;
3664        enum goya_dma_direction user_dir;
3665        u64 device_memory_addr, addr;
3666        enum dma_data_direction dir;
3667        struct sg_table *sgt;
3668        bool skip_host_mem_pin = false;
3669        bool user_memset;
3670        u32 user_rdcomp_mask, user_wrcomp_mask, ctl;
3671
3672        ctl = le32_to_cpu(user_dma_pkt->ctl);
3673
3674        user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3675                        GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3676
3677        user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3678                        GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3679
3680        if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM) ||
3681                        (user_dma_pkt->tsize == 0)) {
3682                memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt));
3683                *new_dma_pkt_size = sizeof(*new_dma_pkt);
3684                return 0;
3685        }
3686
3687        if ((user_dir == DMA_HOST_TO_DRAM) || (user_dir == DMA_HOST_TO_SRAM)) {
3688                addr = le64_to_cpu(user_dma_pkt->src_addr);
3689                device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3690                dir = DMA_TO_DEVICE;
3691                if (user_memset)
3692                        skip_host_mem_pin = true;
3693        } else {
3694                addr = le64_to_cpu(user_dma_pkt->dst_addr);
3695                device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3696                dir = DMA_FROM_DEVICE;
3697        }
3698
3699        if ((!skip_host_mem_pin) &&
3700                (hl_userptr_is_pinned(hdev, addr,
3701                        le32_to_cpu(user_dma_pkt->tsize),
3702                        parser->job_userptr_list, &userptr) == false)) {
3703                dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3704                                addr, user_dma_pkt->tsize);
3705                return -EFAULT;
3706        }
3707
3708        if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3709                memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3710                *new_dma_pkt_size = sizeof(*user_dma_pkt);
3711                return 0;
3712        }
3713
3714        user_rdcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK;
3715
3716        user_wrcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK;
3717
3718        sgt = userptr->sgt;
3719        dma_desc_cnt = 0;
3720
3721        for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3722                len = sg_dma_len(sg);
3723                dma_addr = sg_dma_address(sg);
3724
3725                if (len == 0)
3726                        break;
3727
3728                while ((count + 1) < sgt->nents) {
3729                        sg_next_iter = sg_next(sg);
3730                        len_next = sg_dma_len(sg_next_iter);
3731                        dma_addr_next = sg_dma_address(sg_next_iter);
3732
3733                        if (len_next == 0)
3734                                break;
3735
3736                        if ((dma_addr + len == dma_addr_next) &&
3737                                (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3738                                len += len_next;
3739                                count++;
3740                                sg = sg_next_iter;
3741                        } else {
3742                                break;
3743                        }
3744                }
3745
3746                ctl = le32_to_cpu(user_dma_pkt->ctl);
3747                if (likely(dma_desc_cnt))
3748                        ctl &= ~GOYA_PKT_CTL_EB_MASK;
3749                ctl &= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK |
3750                                GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
3751                new_dma_pkt->ctl = cpu_to_le32(ctl);
3752                new_dma_pkt->tsize = cpu_to_le32((u32) len);
3753
3754                if (dir == DMA_TO_DEVICE) {
3755                        new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3756                        new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3757                } else {
3758                        new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3759                        new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3760                }
3761
3762                if (!user_memset)
3763                        device_memory_addr += len;
3764                dma_desc_cnt++;
3765                new_dma_pkt++;
3766        }
3767
3768        if (!dma_desc_cnt) {
3769                dev_err(hdev->dev,
3770                        "Error of 0 SG entries when patching DMA packet\n");
3771                return -EFAULT;
3772        }
3773
3774        /* Fix the last dma packet - rdcomp/wrcomp must be as user set them */
3775        new_dma_pkt--;
3776        new_dma_pkt->ctl |= cpu_to_le32(user_rdcomp_mask | user_wrcomp_mask);
3777
3778        *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
3779
3780        return 0;
3781}
3782
3783static int goya_patch_cb(struct hl_device *hdev,
3784                                struct hl_cs_parser *parser)
3785{
3786        u32 cb_parsed_length = 0;
3787        u32 cb_patched_cur_length = 0;
3788        int rc = 0;
3789
3790        /* cb_user_size is more than 0 so loop will always be executed */
3791        while (cb_parsed_length < parser->user_cb_size) {
3792                enum packet_id pkt_id;
3793                u16 pkt_size;
3794                u32 new_pkt_size = 0;
3795                struct goya_packet *user_pkt, *kernel_pkt;
3796
3797                user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3798                kernel_pkt = parser->patched_cb->kernel_address +
3799                                        cb_patched_cur_length;
3800
3801                pkt_id = (enum packet_id) (
3802                                (le64_to_cpu(user_pkt->header) &
3803                                PACKET_HEADER_PACKET_ID_MASK) >>
3804                                        PACKET_HEADER_PACKET_ID_SHIFT);
3805
3806                if (!validate_packet_id(pkt_id)) {
3807                        dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3808                        rc = -EINVAL;
3809                        break;
3810                }
3811
3812                pkt_size = goya_packet_sizes[pkt_id];
3813                cb_parsed_length += pkt_size;
3814                if (cb_parsed_length > parser->user_cb_size) {
3815                        dev_err(hdev->dev,
3816                                "packet 0x%x is out of CB boundary\n", pkt_id);
3817                        rc = -EINVAL;
3818                        break;
3819                }
3820
3821                switch (pkt_id) {
3822                case PACKET_LIN_DMA:
3823                        rc = goya_patch_dma_packet(hdev, parser,
3824                                        (struct packet_lin_dma *) user_pkt,
3825                                        (struct packet_lin_dma *) kernel_pkt,
3826                                        &new_pkt_size);
3827                        cb_patched_cur_length += new_pkt_size;
3828                        break;
3829
3830                case PACKET_WREG_32:
3831                        memcpy(kernel_pkt, user_pkt, pkt_size);
3832                        cb_patched_cur_length += pkt_size;
3833                        rc = goya_validate_wreg32(hdev, parser,
3834                                        (struct packet_wreg32 *) kernel_pkt);
3835                        break;
3836
3837                case PACKET_WREG_BULK:
3838                        dev_err(hdev->dev,
3839                                "User not allowed to use WREG_BULK\n");
3840                        rc = -EPERM;
3841                        break;
3842
3843                case PACKET_MSG_PROT:
3844                        dev_err(hdev->dev,
3845                                "User not allowed to use MSG_PROT\n");
3846                        rc = -EPERM;
3847                        break;
3848
3849                case PACKET_CP_DMA:
3850                        dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3851                        rc = -EPERM;
3852                        break;
3853
3854                case PACKET_STOP:
3855                        dev_err(hdev->dev, "User not allowed to use STOP\n");
3856                        rc = -EPERM;
3857                        break;
3858
3859                case PACKET_MSG_LONG:
3860                case PACKET_MSG_SHORT:
3861                case PACKET_FENCE:
3862                case PACKET_NOP:
3863                        memcpy(kernel_pkt, user_pkt, pkt_size);
3864                        cb_patched_cur_length += pkt_size;
3865                        break;
3866
3867                default:
3868                        dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3869                                pkt_id);
3870                        rc = -EINVAL;
3871                        break;
3872                }
3873
3874                if (rc)
3875                        break;
3876        }
3877
3878        return rc;
3879}
3880
3881static int goya_parse_cb_mmu(struct hl_device *hdev,
3882                struct hl_cs_parser *parser)
3883{
3884        u64 patched_cb_handle;
3885        u32 patched_cb_size;
3886        struct hl_cb *user_cb;
3887        int rc;
3888
3889        /*
3890         * The new CB should have space at the end for two MSG_PROT pkt:
3891         * 1. A packet that will act as a completion packet
3892         * 2. A packet that will generate MSI-X interrupt
3893         */
3894        parser->patched_cb_size = parser->user_cb_size +
3895                        sizeof(struct packet_msg_prot) * 2;
3896
3897        rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
3898                                parser->patched_cb_size, false, false,
3899                                &patched_cb_handle);
3900
3901        if (rc) {
3902                dev_err(hdev->dev,
3903                        "Failed to allocate patched CB for DMA CS %d\n",
3904                        rc);
3905                return rc;
3906        }
3907
3908        patched_cb_handle >>= PAGE_SHIFT;
3909        parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
3910                                (u32) patched_cb_handle);
3911        /* hl_cb_get should never fail here */
3912        if (!parser->patched_cb) {
3913                dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
3914                        (u32) patched_cb_handle);
3915                rc = -EFAULT;
3916                goto out;
3917        }
3918
3919        /*
3920         * The check that parser->user_cb_size <= parser->user_cb->size was done
3921         * in validate_queue_index().
3922         */
3923        memcpy(parser->patched_cb->kernel_address,
3924                parser->user_cb->kernel_address,
3925                parser->user_cb_size);
3926
3927        patched_cb_size = parser->patched_cb_size;
3928
3929        /* validate patched CB instead of user CB */
3930        user_cb = parser->user_cb;
3931        parser->user_cb = parser->patched_cb;
3932        rc = goya_validate_cb(hdev, parser, true);
3933        parser->user_cb = user_cb;
3934
3935        if (rc) {
3936                hl_cb_put(parser->patched_cb);
3937                goto out;
3938        }
3939
3940        if (patched_cb_size != parser->patched_cb_size) {
3941                dev_err(hdev->dev, "user CB size mismatch\n");
3942                hl_cb_put(parser->patched_cb);
3943                rc = -EINVAL;
3944                goto out;
3945        }
3946
3947out:
3948        /*
3949         * Always call cb destroy here because we still have 1 reference
3950         * to it by calling cb_get earlier. After the job will be completed,
3951         * cb_put will release it, but here we want to remove it from the
3952         * idr
3953         */
3954        hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
3955                                        patched_cb_handle << PAGE_SHIFT);
3956
3957        return rc;
3958}
3959
3960static int goya_parse_cb_no_mmu(struct hl_device *hdev,
3961                                struct hl_cs_parser *parser)
3962{
3963        u64 patched_cb_handle;
3964        int rc;
3965
3966        rc = goya_validate_cb(hdev, parser, false);
3967
3968        if (rc)
3969                goto free_userptr;
3970
3971        rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
3972                                parser->patched_cb_size, false, false,
3973                                &patched_cb_handle);
3974        if (rc) {
3975                dev_err(hdev->dev,
3976                        "Failed to allocate patched CB for DMA CS %d\n", rc);
3977                goto free_userptr;
3978        }
3979
3980        patched_cb_handle >>= PAGE_SHIFT;
3981        parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
3982                                (u32) patched_cb_handle);
3983        /* hl_cb_get should never fail here */
3984        if (!parser->patched_cb) {
3985                dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
3986                        (u32) patched_cb_handle);
3987                rc = -EFAULT;
3988                goto out;
3989        }
3990
3991        rc = goya_patch_cb(hdev, parser);
3992
3993        if (rc)
3994                hl_cb_put(parser->patched_cb);
3995
3996out:
3997        /*
3998         * Always call cb destroy here because we still have 1 reference
3999         * to it by calling cb_get earlier. After the job will be completed,
4000         * cb_put will release it, but here we want to remove it from the
4001         * idr
4002         */
4003        hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4004                                patched_cb_handle << PAGE_SHIFT);
4005
4006free_userptr:
4007        if (rc)
4008                hl_userptr_delete_list(hdev, parser->job_userptr_list);
4009        return rc;
4010}
4011
4012static int goya_parse_cb_no_ext_queue(struct hl_device *hdev,
4013                                        struct hl_cs_parser *parser)
4014{
4015        struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4016        struct goya_device *goya = hdev->asic_specific;
4017
4018        if (goya->hw_cap_initialized & HW_CAP_MMU)
4019                return 0;
4020
4021        /* For internal queue jobs, just check if CB address is valid */
4022        if (hl_mem_area_inside_range(
4023                        (u64) (uintptr_t) parser->user_cb,
4024                        parser->user_cb_size,
4025                        asic_prop->sram_user_base_address,
4026                        asic_prop->sram_end_address))