linux/drivers/misc/habanalabs/gaudi/gaudi.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3/*
   4 * Copyright 2016-2020 HabanaLabs, Ltd.
   5 * All Rights Reserved.
   6 */
   7
   8#include "gaudiP.h"
   9#include "../include/hw_ip/mmu/mmu_general.h"
  10#include "../include/hw_ip/mmu/mmu_v1_1.h"
  11#include "../include/gaudi/gaudi_masks.h"
  12#include "../include/gaudi/gaudi_fw_if.h"
  13#include "../include/gaudi/gaudi_reg_map.h"
  14#include "../include/gaudi/gaudi_async_ids_map_extended.h"
  15
  16#include <linux/module.h>
  17#include <linux/pci.h>
  18#include <linux/firmware.h>
  19#include <linux/hwmon.h>
  20#include <linux/iommu.h>
  21#include <linux/seq_file.h>
  22
  23/*
  24 * Gaudi security scheme:
  25 *
  26 * 1. Host is protected by:
  27 *        - Range registers
  28 *        - MMU
  29 *
  30 * 2. DDR is protected by:
  31 *        - Range registers (protect the first 512MB)
  32 *
  33 * 3. Configuration is protected by:
  34 *        - Range registers
  35 *        - Protection bits
  36 *
  37 * MMU is always enabled.
  38 *
  39 * QMAN DMA channels 0,1 (PCI DMAN):
  40 *     - DMA is not secured.
  41 *     - PQ and CQ are secured.
  42 *     - CP is secured: The driver needs to parse CB but WREG should be allowed
  43 *                      because of TDMA (tensor DMA). Hence, WREG is always not
  44 *                      secured.
  45 *
  46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
  47 * channel 0 to be secured, execute the DMA and change it back to not secured.
  48 * Currently, the driver doesn't use the DMA while there are compute jobs
  49 * running.
  50 *
  51 * The current use cases for the driver to use the DMA are:
  52 *     - Clear SRAM on context switch (happens on context switch when device is
  53 *       idle)
  54 *     - MMU page tables area clear (happens on init)
  55 *
  56 * QMAN DMA 2-7, TPC, MME, NIC:
  57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
  58 * CQ, CP and the engine are not secured
  59 *
  60 */
  61
  62#define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
  63#define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
  64#define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
  65
  66#define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
  67
  68#define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
  69#define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
  70#define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
  71#define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
  72
  73#define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
  74#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
  75#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
  76#define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
  77#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
  78#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
  79#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000         /* 1s */
  80#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
  81#define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
  82
  83#define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
  84
  85#define GAUDI_MAX_STRING_LEN            20
  86
  87#define GAUDI_CB_POOL_CB_CNT            512
  88#define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
  89
  90#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
  91
  92#define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
  93
  94#define GAUDI_NUM_OF_QM_ERR_CAUSE       16
  95
  96#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
  97
  98#define GAUDI_ARB_WDT_TIMEOUT           0x1000000
  99
 100#define GAUDI_CLK_GATE_DEBUGFS_MASK     (\
 101                BIT(GAUDI_ENGINE_ID_MME_0) |\
 102                BIT(GAUDI_ENGINE_ID_MME_2) |\
 103                GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
 104
 105#define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
 106
 107#define GAUDI_PLL_MAX 10
 108
 109static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
 110                "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
 111                "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
 112                "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
 113                "gaudi cpu eq"
 114};
 115
 116static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
 117        [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
 118        [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
 119        [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
 120        [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
 121        [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
 122        [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
 123        [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
 124        [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
 125};
 126
 127static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
 128        [0] = GAUDI_QUEUE_ID_DMA_0_0,
 129        [1] = GAUDI_QUEUE_ID_DMA_0_1,
 130        [2] = GAUDI_QUEUE_ID_DMA_0_2,
 131        [3] = GAUDI_QUEUE_ID_DMA_0_3,
 132        [4] = GAUDI_QUEUE_ID_DMA_1_0,
 133        [5] = GAUDI_QUEUE_ID_DMA_1_1,
 134        [6] = GAUDI_QUEUE_ID_DMA_1_2,
 135        [7] = GAUDI_QUEUE_ID_DMA_1_3,
 136};
 137
 138static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
 139        [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
 140        [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
 141        [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
 142        [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
 143        [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
 144        [PACKET_REPEAT]         = sizeof(struct packet_repeat),
 145        [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
 146        [PACKET_FENCE]          = sizeof(struct packet_fence),
 147        [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
 148        [PACKET_NOP]            = sizeof(struct packet_nop),
 149        [PACKET_STOP]           = sizeof(struct packet_stop),
 150        [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
 151        [PACKET_WAIT]           = sizeof(struct packet_wait),
 152        [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
 153};
 154
 155static inline bool validate_packet_id(enum packet_id id)
 156{
 157        switch (id) {
 158        case PACKET_WREG_32:
 159        case PACKET_WREG_BULK:
 160        case PACKET_MSG_LONG:
 161        case PACKET_MSG_SHORT:
 162        case PACKET_CP_DMA:
 163        case PACKET_REPEAT:
 164        case PACKET_MSG_PROT:
 165        case PACKET_FENCE:
 166        case PACKET_LIN_DMA:
 167        case PACKET_NOP:
 168        case PACKET_STOP:
 169        case PACKET_ARB_POINT:
 170        case PACKET_WAIT:
 171        case PACKET_LOAD_AND_EXE:
 172                return true;
 173        default:
 174                return false;
 175        }
 176}
 177
 178static const char * const
 179gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
 180        "tpc_address_exceed_slm",
 181        "tpc_div_by_0",
 182        "tpc_spu_mac_overflow",
 183        "tpc_spu_addsub_overflow",
 184        "tpc_spu_abs_overflow",
 185        "tpc_spu_fp_dst_nan_inf",
 186        "tpc_spu_fp_dst_denorm",
 187        "tpc_vpu_mac_overflow",
 188        "tpc_vpu_addsub_overflow",
 189        "tpc_vpu_abs_overflow",
 190        "tpc_vpu_fp_dst_nan_inf",
 191        "tpc_vpu_fp_dst_denorm",
 192        "tpc_assertions",
 193        "tpc_illegal_instruction",
 194        "tpc_pc_wrap_around",
 195        "tpc_qm_sw_err",
 196        "tpc_hbw_rresp_err",
 197        "tpc_hbw_bresp_err",
 198        "tpc_lbw_rresp_err",
 199        "tpc_lbw_bresp_err"
 200};
 201
 202static const char * const
 203gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
 204        "PQ AXI HBW error",
 205        "CQ AXI HBW error",
 206        "CP AXI HBW error",
 207        "CP error due to undefined OPCODE",
 208        "CP encountered STOP OPCODE",
 209        "CP AXI LBW error",
 210        "CP WRREG32 or WRBULK returned error",
 211        "N/A",
 212        "FENCE 0 inc over max value and clipped",
 213        "FENCE 1 inc over max value and clipped",
 214        "FENCE 2 inc over max value and clipped",
 215        "FENCE 3 inc over max value and clipped",
 216        "FENCE 0 dec under min value and clipped",
 217        "FENCE 1 dec under min value and clipped",
 218        "FENCE 2 dec under min value and clipped",
 219        "FENCE 3 dec under min value and clipped"
 220};
 221
 222static const char * const
 223gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
 224        "Choice push while full error",
 225        "Choice Q watchdog error",
 226        "MSG AXI LBW returned with error"
 227};
 228
 229enum gaudi_sm_sei_cause {
 230        GAUDI_SM_SEI_SO_OVERFLOW,
 231        GAUDI_SM_SEI_LBW_4B_UNALIGNED,
 232        GAUDI_SM_SEI_AXI_RESPONSE_ERR
 233};
 234
 235static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
 236        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
 237        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
 238        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
 239        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
 240        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
 241        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
 242        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
 243        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
 244        QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
 245        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
 246        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
 247        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
 248        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
 249        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
 250        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
 251        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
 252        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
 253        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
 254        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
 255        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
 256        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
 257        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
 258        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
 259        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
 260        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
 261        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
 262        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
 263        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
 264        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
 265        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
 266        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
 267        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
 268        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
 269        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
 270        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
 271        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
 272        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
 273        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
 274        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
 275        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
 276        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
 277        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
 278        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
 279        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
 280        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
 281        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
 282        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
 283        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
 284        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
 285        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
 286        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
 287        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
 288        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
 289        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
 290        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
 291        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
 292        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
 293        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
 294        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
 295        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
 296        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
 297        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
 298        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
 299        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
 300        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
 301        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
 302        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
 303        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
 304        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
 305        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
 306        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
 307        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
 308        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
 309        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
 310        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
 311        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
 312        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
 313        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
 314        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
 315        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
 316        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
 317        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
 318        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
 319        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
 320        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
 321        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
 322        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
 323        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
 324        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
 325        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
 326        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
 327        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
 328        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
 329        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
 330        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
 331        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
 332        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
 333        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
 334        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
 335        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
 336        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
 337        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
 338        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
 339        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
 340        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
 341        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
 342        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
 343        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
 344        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
 345        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
 346        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
 347        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
 348        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
 349};
 350
 351struct ecc_info_extract_params {
 352        u64 block_address;
 353        u32 num_memories;
 354        bool derr;
 355        bool disable_clock_gating;
 356};
 357
 358static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
 359                                                                u64 phys_addr);
 360static int gaudi_send_job_on_qman0(struct hl_device *hdev,
 361                                        struct hl_cs_job *job);
 362static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
 363                                        u32 size, u64 val);
 364static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
 365                                        u32 num_regs, u32 val);
 366static int gaudi_schedule_register_memset(struct hl_device *hdev,
 367                u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
 368static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
 369                                u32 tpc_id);
 370static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
 371static int gaudi_cpucp_info_get(struct hl_device *hdev);
 372static void gaudi_disable_clock_gating(struct hl_device *hdev);
 373static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
 374static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
 375                                u32 size, bool eb);
 376static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
 377                                struct hl_gen_wait_properties *prop);
 378
 379static inline enum hl_collective_mode
 380get_collective_mode(struct hl_device *hdev, u32 queue_id)
 381{
 382        if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
 383                return HL_COLLECTIVE_MASTER;
 384
 385        if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
 386                        queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
 387                return HL_COLLECTIVE_SLAVE;
 388
 389        if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
 390                        queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
 391                return HL_COLLECTIVE_SLAVE;
 392
 393        if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
 394                        queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
 395                return HL_COLLECTIVE_SLAVE;
 396
 397        return HL_COLLECTIVE_NOT_SUPPORTED;
 398}
 399
 400static inline void set_default_power_values(struct hl_device *hdev)
 401{
 402        struct asic_fixed_properties *prop = &hdev->asic_prop;
 403
 404        if (hdev->card_type == cpucp_card_type_pmc) {
 405                prop->max_power_default = MAX_POWER_DEFAULT_PMC;
 406                prop->dc_power_default = DC_POWER_DEFAULT_PMC;
 407        } else {
 408                prop->max_power_default = MAX_POWER_DEFAULT_PCI;
 409                prop->dc_power_default = DC_POWER_DEFAULT_PCI;
 410        }
 411}
 412
 413static int gaudi_set_fixed_properties(struct hl_device *hdev)
 414{
 415        struct asic_fixed_properties *prop = &hdev->asic_prop;
 416        u32 num_sync_stream_queues = 0;
 417        int i;
 418
 419        prop->max_queues = GAUDI_QUEUE_ID_SIZE;
 420        prop->hw_queues_props = kcalloc(prop->max_queues,
 421                        sizeof(struct hw_queue_properties),
 422                        GFP_KERNEL);
 423
 424        if (!prop->hw_queues_props)
 425                return -ENOMEM;
 426
 427        for (i = 0 ; i < prop->max_queues ; i++) {
 428                if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
 429                        prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
 430                        prop->hw_queues_props[i].driver_only = 0;
 431                        prop->hw_queues_props[i].supports_sync_stream = 1;
 432                        prop->hw_queues_props[i].cb_alloc_flags =
 433                                CB_ALLOC_KERNEL;
 434                        num_sync_stream_queues++;
 435                } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
 436                        prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
 437                        prop->hw_queues_props[i].driver_only = 1;
 438                        prop->hw_queues_props[i].supports_sync_stream = 0;
 439                        prop->hw_queues_props[i].cb_alloc_flags =
 440                                CB_ALLOC_KERNEL;
 441                } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
 442                        prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
 443                        prop->hw_queues_props[i].driver_only = 0;
 444                        prop->hw_queues_props[i].supports_sync_stream = 0;
 445                        prop->hw_queues_props[i].cb_alloc_flags =
 446                                CB_ALLOC_USER;
 447
 448                }
 449                prop->hw_queues_props[i].collective_mode =
 450                                                get_collective_mode(hdev, i);
 451        }
 452
 453        prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
 454        prop->collective_first_sob = 0;
 455        prop->collective_first_mon = 0;
 456
 457        /* 2 SOBs per internal queue stream are reserved for collective */
 458        prop->sync_stream_first_sob =
 459                        ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
 460                        * QMAN_STREAMS * HL_RSVD_SOBS;
 461
 462        /* 1 monitor per internal queue stream are reserved for collective
 463         * 2 monitors per external queue stream are reserved for collective
 464         */
 465        prop->sync_stream_first_mon =
 466                        (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
 467                        (NUMBER_OF_EXT_HW_QUEUES * 2);
 468
 469        prop->dram_base_address = DRAM_PHYS_BASE;
 470        prop->dram_size = GAUDI_HBM_SIZE_32GB;
 471        prop->dram_end_address = prop->dram_base_address +
 472                                        prop->dram_size;
 473        prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
 474
 475        prop->sram_base_address = SRAM_BASE_ADDR;
 476        prop->sram_size = SRAM_SIZE;
 477        prop->sram_end_address = prop->sram_base_address +
 478                                        prop->sram_size;
 479        prop->sram_user_base_address = prop->sram_base_address +
 480                                        SRAM_USER_BASE_OFFSET;
 481
 482        prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
 483        if (hdev->pldm)
 484                prop->mmu_pgt_size = 0x800000; /* 8MB */
 485        else
 486                prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
 487        prop->mmu_pte_size = HL_PTE_SIZE;
 488        prop->mmu_hop_table_size = HOP_TABLE_SIZE;
 489        prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
 490        prop->dram_page_size = PAGE_SIZE_2MB;
 491        prop->dram_supports_virtual_memory = false;
 492
 493        prop->pmmu.hop0_shift = HOP0_SHIFT;
 494        prop->pmmu.hop1_shift = HOP1_SHIFT;
 495        prop->pmmu.hop2_shift = HOP2_SHIFT;
 496        prop->pmmu.hop3_shift = HOP3_SHIFT;
 497        prop->pmmu.hop4_shift = HOP4_SHIFT;
 498        prop->pmmu.hop0_mask = HOP0_MASK;
 499        prop->pmmu.hop1_mask = HOP1_MASK;
 500        prop->pmmu.hop2_mask = HOP2_MASK;
 501        prop->pmmu.hop3_mask = HOP3_MASK;
 502        prop->pmmu.hop4_mask = HOP4_MASK;
 503        prop->pmmu.start_addr = VA_HOST_SPACE_START;
 504        prop->pmmu.end_addr =
 505                        (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
 506        prop->pmmu.page_size = PAGE_SIZE_4KB;
 507        prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
 508
 509        /* PMMU and HPMMU are the same except of page size */
 510        memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
 511        prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
 512
 513        /* shifts and masks are the same in PMMU and DMMU */
 514        memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
 515        prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
 516        prop->dmmu.end_addr = VA_HOST_SPACE_END;
 517        prop->dmmu.page_size = PAGE_SIZE_2MB;
 518
 519        prop->cfg_size = CFG_SIZE;
 520        prop->max_asid = MAX_ASID;
 521        prop->num_of_events = GAUDI_EVENT_SIZE;
 522        prop->tpc_enabled_mask = TPC_ENABLED_MASK;
 523
 524        set_default_power_values(hdev);
 525
 526        prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
 527        prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
 528
 529        prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
 530        prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
 531
 532        strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
 533                                        CARD_NAME_MAX_LEN);
 534
 535        prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
 536
 537        prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
 538                        prop->sync_stream_first_sob +
 539                        (num_sync_stream_queues * HL_RSVD_SOBS);
 540        prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
 541                        prop->sync_stream_first_mon +
 542                        (num_sync_stream_queues * HL_RSVD_MONS);
 543
 544        prop->first_available_user_msix_interrupt = USHRT_MAX;
 545
 546        for (i = 0 ; i < HL_MAX_DCORES ; i++)
 547                prop->first_available_cq[i] = USHRT_MAX;
 548
 549        prop->fw_cpu_boot_dev_sts0_valid = false;
 550        prop->fw_cpu_boot_dev_sts1_valid = false;
 551        prop->hard_reset_done_by_fw = false;
 552        prop->gic_interrupts_enable = true;
 553
 554        return 0;
 555}
 556
 557static int gaudi_pci_bars_map(struct hl_device *hdev)
 558{
 559        static const char * const name[] = {"SRAM", "CFG", "HBM"};
 560        bool is_wc[3] = {false, false, true};
 561        int rc;
 562
 563        rc = hl_pci_bars_map(hdev, name, is_wc);
 564        if (rc)
 565                return rc;
 566
 567        hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
 568                        (CFG_BASE - SPI_FLASH_BASE_ADDR);
 569
 570        return 0;
 571}
 572
 573static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
 574{
 575        struct gaudi_device *gaudi = hdev->asic_specific;
 576        struct hl_inbound_pci_region pci_region;
 577        u64 old_addr = addr;
 578        int rc;
 579
 580        if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
 581                return old_addr;
 582
 583        if (hdev->asic_prop.iatu_done_by_fw)
 584                return U64_MAX;
 585
 586        /* Inbound Region 2 - Bar 4 - Point to HBM */
 587        pci_region.mode = PCI_BAR_MATCH_MODE;
 588        pci_region.bar = HBM_BAR_ID;
 589        pci_region.addr = addr;
 590        rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
 591        if (rc)
 592                return U64_MAX;
 593
 594        if (gaudi) {
 595                old_addr = gaudi->hbm_bar_cur_addr;
 596                gaudi->hbm_bar_cur_addr = addr;
 597        }
 598
 599        return old_addr;
 600}
 601
 602static int gaudi_init_iatu(struct hl_device *hdev)
 603{
 604        struct hl_inbound_pci_region inbound_region;
 605        struct hl_outbound_pci_region outbound_region;
 606        int rc;
 607
 608        if (hdev->asic_prop.iatu_done_by_fw)
 609                return 0;
 610
 611        /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
 612        inbound_region.mode = PCI_BAR_MATCH_MODE;
 613        inbound_region.bar = SRAM_BAR_ID;
 614        inbound_region.addr = SRAM_BASE_ADDR;
 615        rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
 616        if (rc)
 617                goto done;
 618
 619        /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
 620        inbound_region.mode = PCI_BAR_MATCH_MODE;
 621        inbound_region.bar = CFG_BAR_ID;
 622        inbound_region.addr = SPI_FLASH_BASE_ADDR;
 623        rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
 624        if (rc)
 625                goto done;
 626
 627        /* Inbound Region 2 - Bar 4 - Point to HBM */
 628        inbound_region.mode = PCI_BAR_MATCH_MODE;
 629        inbound_region.bar = HBM_BAR_ID;
 630        inbound_region.addr = DRAM_PHYS_BASE;
 631        rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
 632        if (rc)
 633                goto done;
 634
 635        hdev->asic_funcs->set_dma_mask_from_fw(hdev);
 636
 637        /* Outbound Region 0 - Point to Host */
 638        outbound_region.addr = HOST_PHYS_BASE;
 639        outbound_region.size = HOST_PHYS_SIZE;
 640        rc = hl_pci_set_outbound_region(hdev, &outbound_region);
 641
 642done:
 643        return rc;
 644}
 645
 646static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
 647{
 648        return RREG32(mmHW_STATE);
 649}
 650
 651static int gaudi_early_init(struct hl_device *hdev)
 652{
 653        struct asic_fixed_properties *prop = &hdev->asic_prop;
 654        struct pci_dev *pdev = hdev->pdev;
 655        u32 fw_boot_status;
 656        int rc;
 657
 658        rc = gaudi_set_fixed_properties(hdev);
 659        if (rc) {
 660                dev_err(hdev->dev, "Failed setting fixed properties\n");
 661                return rc;
 662        }
 663
 664        /* Check BAR sizes */
 665        if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
 666                dev_err(hdev->dev,
 667                        "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
 668                        SRAM_BAR_ID,
 669                        (unsigned long long) pci_resource_len(pdev,
 670                                                        SRAM_BAR_ID),
 671                        SRAM_BAR_SIZE);
 672                rc = -ENODEV;
 673                goto free_queue_props;
 674        }
 675
 676        if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
 677                dev_err(hdev->dev,
 678                        "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
 679                        CFG_BAR_ID,
 680                        (unsigned long long) pci_resource_len(pdev,
 681                                                                CFG_BAR_ID),
 682                        CFG_BAR_SIZE);
 683                rc = -ENODEV;
 684                goto free_queue_props;
 685        }
 686
 687        prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
 688
 689        /* If FW security is enabled at this point it means no access to ELBI */
 690        if (hdev->asic_prop.fw_security_enabled) {
 691                hdev->asic_prop.iatu_done_by_fw = true;
 692
 693                /*
 694                 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
 695                 * decision can only be taken based on PCI ID security.
 696                 */
 697                hdev->asic_prop.gic_interrupts_enable = false;
 698                goto pci_init;
 699        }
 700
 701        rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
 702                                &fw_boot_status);
 703        if (rc)
 704                goto free_queue_props;
 705
 706        /* Check whether FW is configuring iATU */
 707        if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
 708                        (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
 709                hdev->asic_prop.iatu_done_by_fw = true;
 710
 711pci_init:
 712        rc = hl_pci_init(hdev);
 713        if (rc)
 714                goto free_queue_props;
 715
 716        /* Before continuing in the initialization, we need to read the preboot
 717         * version to determine whether we run with a security-enabled firmware
 718         */
 719        rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
 720                                        mmCPU_BOOT_DEV_STS0,
 721                                        mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
 722                                        mmCPU_BOOT_ERR1,
 723                                        GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
 724        if (rc) {
 725                if (hdev->reset_on_preboot_fail)
 726                        hdev->asic_funcs->hw_fini(hdev, true);
 727                goto pci_fini;
 728        }
 729
 730        if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
 731                dev_info(hdev->dev,
 732                        "H/W state is dirty, must reset before initializing\n");
 733                hdev->asic_funcs->hw_fini(hdev, true);
 734        }
 735
 736        return 0;
 737
 738pci_fini:
 739        hl_pci_fini(hdev);
 740free_queue_props:
 741        kfree(hdev->asic_prop.hw_queues_props);
 742        return rc;
 743}
 744
 745static int gaudi_early_fini(struct hl_device *hdev)
 746{
 747        kfree(hdev->asic_prop.hw_queues_props);
 748        hl_pci_fini(hdev);
 749
 750        return 0;
 751}
 752
 753/**
 754 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
 755 *
 756 * @hdev: pointer to hl_device structure
 757 *
 758 */
 759static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
 760{
 761        struct asic_fixed_properties *prop = &hdev->asic_prop;
 762        u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
 763        u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
 764        int rc;
 765
 766        if (hdev->asic_prop.fw_security_enabled) {
 767                rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
 768
 769                if (rc)
 770                        return rc;
 771
 772                freq = pll_freq_arr[2];
 773        } else {
 774                /* Backward compatibility */
 775                div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
 776                div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
 777                nr = RREG32(mmPSOC_CPU_PLL_NR);
 778                nf = RREG32(mmPSOC_CPU_PLL_NF);
 779                od = RREG32(mmPSOC_CPU_PLL_OD);
 780
 781                if (div_sel == DIV_SEL_REF_CLK ||
 782                                div_sel == DIV_SEL_DIVIDED_REF) {
 783                        if (div_sel == DIV_SEL_REF_CLK)
 784                                freq = PLL_REF_CLK;
 785                        else
 786                                freq = PLL_REF_CLK / (div_fctr + 1);
 787                } else if (div_sel == DIV_SEL_PLL_CLK ||
 788                        div_sel == DIV_SEL_DIVIDED_PLL) {
 789                        pll_clk = PLL_REF_CLK * (nf + 1) /
 790                                        ((nr + 1) * (od + 1));
 791                        if (div_sel == DIV_SEL_PLL_CLK)
 792                                freq = pll_clk;
 793                        else
 794                                freq = pll_clk / (div_fctr + 1);
 795                } else {
 796                        dev_warn(hdev->dev,
 797                                "Received invalid div select value: %d",
 798                                div_sel);
 799                        freq = 0;
 800                }
 801        }
 802
 803        prop->psoc_timestamp_frequency = freq;
 804        prop->psoc_pci_pll_nr = nr;
 805        prop->psoc_pci_pll_nf = nf;
 806        prop->psoc_pci_pll_od = od;
 807        prop->psoc_pci_pll_div_factor = div_fctr;
 808
 809        return 0;
 810}
 811
 812static int _gaudi_init_tpc_mem(struct hl_device *hdev,
 813                dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
 814{
 815        struct asic_fixed_properties *prop = &hdev->asic_prop;
 816        struct packet_lin_dma *init_tpc_mem_pkt;
 817        struct hl_cs_job *job;
 818        struct hl_cb *cb;
 819        u64 dst_addr;
 820        u32 cb_size, ctl;
 821        u8 tpc_id;
 822        int rc;
 823
 824        cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
 825        if (!cb)
 826                return -EFAULT;
 827
 828        init_tpc_mem_pkt = cb->kernel_address;
 829        cb_size = sizeof(*init_tpc_mem_pkt);
 830        memset(init_tpc_mem_pkt, 0, cb_size);
 831
 832        init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
 833
 834        ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
 835        ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
 836        ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
 837        ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
 838
 839        init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
 840
 841        init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
 842        dst_addr = (prop->sram_user_base_address &
 843                        GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
 844                        GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
 845        init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
 846
 847        job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
 848        if (!job) {
 849                dev_err(hdev->dev, "Failed to allocate a new job\n");
 850                rc = -ENOMEM;
 851                goto release_cb;
 852        }
 853
 854        job->id = 0;
 855        job->user_cb = cb;
 856        atomic_inc(&job->user_cb->cs_cnt);
 857        job->user_cb_size = cb_size;
 858        job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
 859        job->patched_cb = job->user_cb;
 860        job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
 861
 862        hl_debugfs_add_job(hdev, job);
 863
 864        rc = gaudi_send_job_on_qman0(hdev, job);
 865
 866        if (rc)
 867                goto free_job;
 868
 869        for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
 870                rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
 871                if (rc)
 872                        break;
 873        }
 874
 875free_job:
 876        hl_userptr_delete_list(hdev, &job->userptr_list);
 877        hl_debugfs_remove_job(hdev, job);
 878        kfree(job);
 879        atomic_dec(&cb->cs_cnt);
 880
 881release_cb:
 882        hl_cb_put(cb);
 883        hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
 884
 885        return rc;
 886}
 887
 888/*
 889 * gaudi_init_tpc_mem() - Initialize TPC memories.
 890 * @hdev: Pointer to hl_device structure.
 891 *
 892 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
 893 *
 894 * Return: 0 for success, negative value for error.
 895 */
 896static int gaudi_init_tpc_mem(struct hl_device *hdev)
 897{
 898        const struct firmware *fw;
 899        size_t fw_size;
 900        void *cpu_addr;
 901        dma_addr_t dma_handle;
 902        int rc, count = 5;
 903
 904again:
 905        rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
 906        if (rc == -EINTR && count-- > 0) {
 907                msleep(50);
 908                goto again;
 909        }
 910
 911        if (rc) {
 912                dev_err(hdev->dev, "Failed to load firmware file %s\n",
 913                                GAUDI_TPC_FW_FILE);
 914                goto out;
 915        }
 916
 917        fw_size = fw->size;
 918        cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
 919                        &dma_handle, GFP_KERNEL | __GFP_ZERO);
 920        if (!cpu_addr) {
 921                dev_err(hdev->dev,
 922                        "Failed to allocate %zu of dma memory for TPC kernel\n",
 923                        fw_size);
 924                rc = -ENOMEM;
 925                goto out;
 926        }
 927
 928        memcpy(cpu_addr, fw->data, fw_size);
 929
 930        rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
 931
 932        hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
 933                        dma_handle);
 934
 935out:
 936        release_firmware(fw);
 937        return rc;
 938}
 939
 940static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
 941{
 942        struct gaudi_device *gaudi = hdev->asic_specific;
 943        struct gaudi_collective_properties *prop = &gaudi->collective_props;
 944        struct hl_hw_queue *q;
 945        u32 i, sob_id, sob_group_id, queue_id;
 946
 947        /* Iterate through SOB groups and assign a SOB for each slave queue */
 948        sob_group_id =
 949                stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
 950        sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
 951
 952        queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
 953        for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
 954                q = &hdev->kernel_queues[queue_id + (4 * i)];
 955                q->sync_stream_prop.collective_sob_id = sob_id + i;
 956        }
 957
 958        /* Both DMA5 and TPC7 use the same resources since only a single
 959         * engine need to participate in the reduction process
 960         */
 961        queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
 962        q = &hdev->kernel_queues[queue_id];
 963        q->sync_stream_prop.collective_sob_id =
 964                        sob_id + NIC_NUMBER_OF_ENGINES;
 965
 966        queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
 967        q = &hdev->kernel_queues[queue_id];
 968        q->sync_stream_prop.collective_sob_id =
 969                        sob_id + NIC_NUMBER_OF_ENGINES;
 970}
 971
 972static void gaudi_sob_group_hw_reset(struct kref *ref)
 973{
 974        struct gaudi_hw_sob_group *hw_sob_group =
 975                container_of(ref, struct gaudi_hw_sob_group, kref);
 976        struct hl_device *hdev = hw_sob_group->hdev;
 977        u64 base_addr;
 978        int rc;
 979
 980        base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
 981                        hw_sob_group->base_sob_id * 4;
 982        rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
 983                        base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
 984        if (rc)
 985                dev_err(hdev->dev,
 986                        "failed resetting sob group - sob base %u, count %u",
 987                        hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
 988
 989        kref_init(&hw_sob_group->kref);
 990}
 991
 992static void gaudi_sob_group_reset_error(struct kref *ref)
 993{
 994        struct gaudi_hw_sob_group *hw_sob_group =
 995                container_of(ref, struct gaudi_hw_sob_group, kref);
 996        struct hl_device *hdev = hw_sob_group->hdev;
 997
 998        dev_crit(hdev->dev,
 999                "SOB release shouldn't be called here, base_sob_id: %d\n",
1000                hw_sob_group->base_sob_id);
1001}
1002
1003static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1004{
1005        struct gaudi_collective_properties *prop;
1006        int i;
1007
1008        prop = &gaudi->collective_props;
1009
1010        memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1011
1012        for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1013                if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1014                        prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1015                                        BIT(i % HL_MAX_SOBS_PER_MONITOR);
1016        /* Set collective engine bit */
1017        prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1018                                BIT(i % HL_MAX_SOBS_PER_MONITOR);
1019}
1020
1021static int gaudi_collective_init(struct hl_device *hdev)
1022{
1023        u32 i, sob_id, reserved_sobs_per_group;
1024        struct gaudi_collective_properties *prop;
1025        struct gaudi_device *gaudi;
1026
1027        gaudi = hdev->asic_specific;
1028        prop = &gaudi->collective_props;
1029        sob_id = hdev->asic_prop.collective_first_sob;
1030
1031        /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1032        reserved_sobs_per_group =
1033                ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1034
1035        /* Init SOB groups */
1036        for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1037                prop->hw_sob_group[i].hdev = hdev;
1038                prop->hw_sob_group[i].base_sob_id = sob_id;
1039                sob_id += reserved_sobs_per_group;
1040                gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1041        }
1042
1043        for (i = 0 ; i < QMAN_STREAMS; i++) {
1044                prop->next_sob_group_val[i] = 1;
1045                prop->curr_sob_group_idx[i] = 0;
1046                gaudi_collective_map_sobs(hdev, i);
1047        }
1048
1049        gaudi_collective_mstr_sob_mask_set(gaudi);
1050
1051        return 0;
1052}
1053
1054static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1055{
1056        struct gaudi_device *gaudi = hdev->asic_specific;
1057        struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1058
1059        kref_put(&cprop->hw_sob_group[sob_group].kref,
1060                                        gaudi_sob_group_hw_reset);
1061}
1062
1063static void gaudi_collective_master_init_job(struct hl_device *hdev,
1064                struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1065{
1066        u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1067        struct gaudi_collective_properties *cprop;
1068        struct hl_gen_wait_properties wait_prop;
1069        struct hl_sync_stream_properties *prop;
1070        struct gaudi_device *gaudi;
1071
1072        gaudi = hdev->asic_specific;
1073        cprop = &gaudi->collective_props;
1074        queue_id = job->hw_queue_id;
1075        prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1076
1077        master_sob_base =
1078                cprop->hw_sob_group[sob_group_offset].base_sob_id;
1079        master_monitor = prop->collective_mstr_mon_id[0];
1080
1081        cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1082
1083        dev_dbg(hdev->dev,
1084                "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1085                master_sob_base, cprop->mstr_sob_mask[0],
1086                cprop->next_sob_group_val[stream],
1087                master_monitor, queue_id);
1088
1089        wait_prop.data = (void *) job->patched_cb;
1090        wait_prop.sob_base = master_sob_base;
1091        wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1092        wait_prop.sob_val = cprop->next_sob_group_val[stream];
1093        wait_prop.mon_id = master_monitor;
1094        wait_prop.q_idx = queue_id;
1095        wait_prop.size = cb_size;
1096        cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1097
1098        master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1099        master_monitor = prop->collective_mstr_mon_id[1];
1100
1101        dev_dbg(hdev->dev,
1102                "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1103                master_sob_base, cprop->mstr_sob_mask[1],
1104                cprop->next_sob_group_val[stream],
1105                master_monitor, queue_id);
1106
1107        wait_prop.sob_base = master_sob_base;
1108        wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1109        wait_prop.mon_id = master_monitor;
1110        wait_prop.size = cb_size;
1111        cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1112}
1113
1114static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1115                struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1116{
1117        struct hl_gen_wait_properties wait_prop;
1118        struct hl_sync_stream_properties *prop;
1119        u32 queue_id, cb_size = 0;
1120
1121        queue_id = job->hw_queue_id;
1122        prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1123
1124        /* Add to wait CBs using slave monitor */
1125        wait_prop.data = (void *) job->user_cb;
1126        wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1127        wait_prop.sob_mask = 0x1;
1128        wait_prop.sob_val = cs_cmpl->sob_val;
1129        wait_prop.mon_id = prop->collective_slave_mon_id;
1130        wait_prop.q_idx = queue_id;
1131        wait_prop.size = cb_size;
1132
1133        dev_dbg(hdev->dev,
1134                "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1135                cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1136                prop->collective_slave_mon_id, queue_id);
1137
1138        cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1139
1140        dev_dbg(hdev->dev,
1141                "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1142                prop->collective_sob_id, queue_id);
1143
1144        cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1145                        prop->collective_sob_id, cb_size, false);
1146}
1147
1148static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
1149{
1150        struct hl_cs_compl *signal_cs_cmpl =
1151                container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1152        struct hl_cs_compl *cs_cmpl =
1153                container_of(cs->fence, struct hl_cs_compl, base_fence);
1154        struct gaudi_collective_properties *cprop;
1155        u32 stream, queue_id, sob_group_offset;
1156        struct gaudi_device *gaudi;
1157        struct hl_device *hdev;
1158        struct hl_cs_job *job;
1159        struct hl_ctx *ctx;
1160
1161        ctx = cs->ctx;
1162        hdev = ctx->hdev;
1163        gaudi = hdev->asic_specific;
1164        cprop = &gaudi->collective_props;
1165
1166        /* copy the SOB id and value of the signal CS */
1167        cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1168        cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1169
1170        /* Calculate the stream from collective master queue (1st job) */
1171        job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1172        stream = job->hw_queue_id % 4;
1173        sob_group_offset =
1174                stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1175
1176        list_for_each_entry(job, &cs->job_list, cs_node) {
1177                queue_id = job->hw_queue_id;
1178
1179                if (hdev->kernel_queues[queue_id].collective_mode ==
1180                                HL_COLLECTIVE_MASTER)
1181                        gaudi_collective_master_init_job(hdev, job, stream,
1182                                                sob_group_offset);
1183                else
1184                        gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1185        }
1186
1187        cs_cmpl->sob_group = sob_group_offset;
1188
1189        /* Handle sob group kref and wraparound */
1190        kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1191        cprop->next_sob_group_val[stream]++;
1192
1193        if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1194                /*
1195                 * Decrement as we reached the max value.
1196                 * The release function won't be called here as we've
1197                 * just incremented the refcount.
1198                 */
1199                kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1200                                gaudi_sob_group_reset_error);
1201                cprop->next_sob_group_val[stream] = 1;
1202                /* only two SOBs are currently in use */
1203                cprop->curr_sob_group_idx[stream] =
1204                        (cprop->curr_sob_group_idx[stream] + 1) &
1205                                                        (HL_RSVD_SOBS - 1);
1206
1207                gaudi_collective_map_sobs(hdev, stream);
1208
1209                dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1210                                cprop->curr_sob_group_idx[stream], stream);
1211        }
1212
1213        /* Increment kref since all slave queues are now waiting on it */
1214        kref_get(&cs_cmpl->hw_sob->kref);
1215        /*
1216         * Must put the signal fence after the SOB refcnt increment so
1217         * the SOB refcnt won't turn 0 and reset the SOB before the
1218         * wait CS was submitted.
1219         */
1220        mb();
1221        hl_fence_put(cs->signal_fence);
1222        cs->signal_fence = NULL;
1223}
1224
1225static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1226                struct hl_ctx *ctx, struct hl_cs *cs,
1227                enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
1228{
1229        struct hw_queue_properties *hw_queue_prop;
1230        struct hl_cs_counters_atomic *cntr;
1231        struct hl_cs_job *job;
1232        struct hl_cb *cb;
1233        u32 cb_size;
1234        bool patched_cb;
1235
1236        cntr = &hdev->aggregated_cs_counters;
1237
1238        if (mode == HL_COLLECTIVE_MASTER) {
1239                /* CB size of collective master queue contains
1240                 * 4 msg short packets for monitor 1 configuration
1241                 * 1 fence packet
1242                 * 4 msg short packets for monitor 2 configuration
1243                 * 1 fence packet
1244                 * 2 msg prot packets for completion and MSI-X
1245                 */
1246                cb_size = sizeof(struct packet_msg_short) * 8 +
1247                                sizeof(struct packet_fence) * 2 +
1248                                sizeof(struct packet_msg_prot) * 2;
1249                patched_cb = true;
1250        } else {
1251                /* CB size of collective slave queues contains
1252                 * 4 msg short packets for monitor configuration
1253                 * 1 fence packet
1254                 * 1 additional msg short packet for sob signal
1255                 */
1256                cb_size = sizeof(struct packet_msg_short) * 5 +
1257                                sizeof(struct packet_fence);
1258                patched_cb = false;
1259        }
1260
1261        hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1262        job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1263        if (!job) {
1264                atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1265                atomic64_inc(&cntr->out_of_mem_drop_cnt);
1266                dev_err(hdev->dev, "Failed to allocate a new job\n");
1267                return -ENOMEM;
1268        }
1269
1270        /* Allocate internal mapped CB for non patched CBs */
1271        cb = hl_cb_kernel_create(hdev, cb_size,
1272                        hdev->mmu_enable && !patched_cb);
1273        if (!cb) {
1274                atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1275                atomic64_inc(&cntr->out_of_mem_drop_cnt);
1276                kfree(job);
1277                return -EFAULT;
1278        }
1279
1280        job->id = 0;
1281        job->cs = cs;
1282        job->user_cb = cb;
1283        atomic_inc(&job->user_cb->cs_cnt);
1284        job->user_cb_size = cb_size;
1285        job->hw_queue_id = queue_id;
1286
1287        /*
1288         * No need in parsing, user CB is the patched CB.
1289         * We call hl_cb_destroy() out of two reasons - we don't need
1290         * the CB in the CB idr anymore and to decrement its refcount as
1291         * it was incremented inside hl_cb_kernel_create().
1292         */
1293        if (patched_cb)
1294                job->patched_cb = job->user_cb;
1295        else
1296                job->patched_cb = NULL;
1297
1298        job->job_cb_size = job->user_cb_size;
1299        hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1300
1301        /* increment refcount as for external queues we get completion */
1302        if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1303                cs_get(cs);
1304
1305        cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1306
1307        list_add_tail(&job->cs_node, &cs->job_list);
1308
1309        hl_debugfs_add_job(hdev, job);
1310
1311        return 0;
1312}
1313
1314static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1315                struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1316                u32 collective_engine_id)
1317{
1318        struct gaudi_device *gaudi = hdev->asic_specific;
1319        struct hw_queue_properties *hw_queue_prop;
1320        u32 queue_id, collective_queue, num_jobs;
1321        u32 stream, nic_queue, nic_idx = 0;
1322        bool skip;
1323        int i, rc = 0;
1324
1325        /* Verify wait queue id is configured as master */
1326        hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1327        if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1328                dev_err(hdev->dev,
1329                        "Queue %d is not configured as collective master\n",
1330                        wait_queue_id);
1331                return -EINVAL;
1332        }
1333
1334        /* Verify engine id is supported */
1335        if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1336                        collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1337                dev_err(hdev->dev,
1338                        "Collective wait does not support engine %u\n",
1339                        collective_engine_id);
1340                return -EINVAL;
1341        }
1342
1343        stream = wait_queue_id % 4;
1344
1345        if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1346                collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1347        else
1348                collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1349
1350        num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1351        nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1352
1353        /* First job goes to the collective master queue, it will wait for
1354         * the collective slave queues to finish execution.
1355         * The synchronization is done using two monitors:
1356         * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1357         * reduction engine (DMA5/TPC7).
1358         *
1359         * Rest of the jobs goes to the collective slave queues which will
1360         * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1361         */
1362        for (i = 0 ; i < num_jobs ; i++) {
1363                if (i == 0) {
1364                        queue_id = wait_queue_id;
1365                        rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1366                                HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
1367                } else {
1368                        if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1369                                if (gaudi->hw_cap_initialized &
1370                                        BIT(HW_CAP_NIC_SHIFT + nic_idx))
1371                                        skip = false;
1372                                else
1373                                        skip = true;
1374
1375                                queue_id = nic_queue;
1376                                nic_queue += 4;
1377                                nic_idx++;
1378
1379                                if (skip)
1380                                        continue;
1381                        } else {
1382                                queue_id = collective_queue;
1383                        }
1384
1385                        rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1386                                HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
1387                }
1388
1389                if (rc)
1390                        return rc;
1391        }
1392
1393        return rc;
1394}
1395
1396static int gaudi_late_init(struct hl_device *hdev)
1397{
1398        struct gaudi_device *gaudi = hdev->asic_specific;
1399        int rc;
1400
1401        rc = gaudi->cpucp_info_get(hdev);
1402        if (rc) {
1403                dev_err(hdev->dev, "Failed to get cpucp info\n");
1404                return rc;
1405        }
1406
1407        if ((hdev->card_type == cpucp_card_type_pci) &&
1408                        (hdev->nic_ports_mask & 0x3)) {
1409                dev_info(hdev->dev,
1410                        "PCI card detected, only 8 ports are enabled\n");
1411                hdev->nic_ports_mask &= ~0x3;
1412
1413                /* Stop and disable unused NIC QMANs */
1414                WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1415                                        NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1416                                        NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1417
1418                WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1419                                        NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1420                                        NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1421
1422                WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1423                WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1424
1425                gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1426        }
1427
1428        rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1429        if (rc) {
1430                dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1431                return rc;
1432        }
1433
1434        rc = gaudi_fetch_psoc_frequency(hdev);
1435        if (rc) {
1436                dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1437                goto disable_pci_access;
1438        }
1439
1440        rc = gaudi_mmu_clear_pgt_range(hdev);
1441        if (rc) {
1442                dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1443                goto disable_pci_access;
1444        }
1445
1446        rc = gaudi_init_tpc_mem(hdev);
1447        if (rc) {
1448                dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1449                goto disable_pci_access;
1450        }
1451
1452        rc = gaudi_collective_init(hdev);
1453        if (rc) {
1454                dev_err(hdev->dev, "Failed to init collective\n");
1455                goto disable_pci_access;
1456        }
1457
1458        return 0;
1459
1460disable_pci_access:
1461        hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1462
1463        return rc;
1464}
1465
1466static void gaudi_late_fini(struct hl_device *hdev)
1467{
1468        const struct hwmon_channel_info **channel_info_arr;
1469        int i = 0;
1470
1471        if (!hdev->hl_chip_info->info)
1472                return;
1473
1474        channel_info_arr = hdev->hl_chip_info->info;
1475
1476        while (channel_info_arr[i]) {
1477                kfree(channel_info_arr[i]->config);
1478                kfree(channel_info_arr[i]);
1479                i++;
1480        }
1481
1482        kfree(channel_info_arr);
1483
1484        hdev->hl_chip_info->info = NULL;
1485}
1486
1487static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1488{
1489        dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1490        void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1491        int i, j, rc = 0;
1492
1493        /*
1494         * The device CPU works with 40-bits addresses, while bit 39 must be set
1495         * to '1' when accessing the host.
1496         * Bits 49:39 of the full host address are saved for a later
1497         * configuration of the HW to perform extension to 50 bits.
1498         * Because there is a single HW register that holds the extension bits,
1499         * these bits must be identical in all allocated range.
1500         */
1501
1502        for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1503                virt_addr_arr[i] =
1504                        hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1505                                                HL_CPU_ACCESSIBLE_MEM_SIZE,
1506                                                &dma_addr_arr[i],
1507                                                GFP_KERNEL | __GFP_ZERO);
1508                if (!virt_addr_arr[i]) {
1509                        rc = -ENOMEM;
1510                        goto free_dma_mem_arr;
1511                }
1512
1513                end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1514                if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1515                                GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1516                        break;
1517        }
1518
1519        if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1520                dev_err(hdev->dev,
1521                        "MSB of CPU accessible DMA memory are not identical in all range\n");
1522                rc = -EFAULT;
1523                goto free_dma_mem_arr;
1524        }
1525
1526        hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1527        hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1528        hdev->cpu_pci_msb_addr =
1529                GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1530
1531        if (!hdev->asic_prop.fw_security_enabled)
1532                GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1533
1534free_dma_mem_arr:
1535        for (j = 0 ; j < i ; j++)
1536                hdev->asic_funcs->asic_dma_free_coherent(hdev,
1537                                                HL_CPU_ACCESSIBLE_MEM_SIZE,
1538                                                virt_addr_arr[j],
1539                                                dma_addr_arr[j]);
1540
1541        return rc;
1542}
1543
1544static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1545{
1546        struct gaudi_device *gaudi = hdev->asic_specific;
1547        struct gaudi_internal_qman_info *q;
1548        u32 i;
1549
1550        for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1551                q = &gaudi->internal_qmans[i];
1552                if (!q->pq_kernel_addr)
1553                        continue;
1554                hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1555                                                        q->pq_kernel_addr,
1556                                                        q->pq_dma_addr);
1557        }
1558}
1559
1560static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1561{
1562        struct gaudi_device *gaudi = hdev->asic_specific;
1563        struct gaudi_internal_qman_info *q;
1564        int rc, i;
1565
1566        for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1567                if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1568                        continue;
1569
1570                q = &gaudi->internal_qmans[i];
1571
1572                switch (i) {
1573                case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1574                        q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1575                        break;
1576                case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1577                        q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1578                        break;
1579                case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1580                        q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1581                        break;
1582                case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1583                        q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1584                        break;
1585                default:
1586                        dev_err(hdev->dev, "Bad internal queue index %d", i);
1587                        rc = -EINVAL;
1588                        goto free_internal_qmans_pq_mem;
1589                }
1590
1591                q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1592                                                hdev, q->pq_size,
1593                                                &q->pq_dma_addr,
1594                                                GFP_KERNEL | __GFP_ZERO);
1595                if (!q->pq_kernel_addr) {
1596                        rc = -ENOMEM;
1597                        goto free_internal_qmans_pq_mem;
1598                }
1599        }
1600
1601        return 0;
1602
1603free_internal_qmans_pq_mem:
1604        gaudi_free_internal_qmans_pq_mem(hdev);
1605        return rc;
1606}
1607
1608static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1609{
1610        struct asic_fixed_properties *prop = &hdev->asic_prop;
1611        struct pci_mem_region *region;
1612
1613        /* CFG */
1614        region = &hdev->pci_mem_region[PCI_REGION_CFG];
1615        region->region_base = CFG_BASE;
1616        region->region_size = CFG_SIZE;
1617        region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1618        region->bar_size = CFG_BAR_SIZE;
1619        region->bar_id = CFG_BAR_ID;
1620        region->used = 1;
1621
1622        /* SRAM */
1623        region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1624        region->region_base = SRAM_BASE_ADDR;
1625        region->region_size = SRAM_SIZE;
1626        region->offset_in_bar = 0;
1627        region->bar_size = SRAM_BAR_SIZE;
1628        region->bar_id = SRAM_BAR_ID;
1629        region->used = 1;
1630
1631        /* DRAM */
1632        region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1633        region->region_base = DRAM_PHYS_BASE;
1634        region->region_size = hdev->asic_prop.dram_size;
1635        region->offset_in_bar = 0;
1636        region->bar_size = prop->dram_pci_bar_size;
1637        region->bar_id = HBM_BAR_ID;
1638        region->used = 1;
1639
1640        /* SP SRAM */
1641        region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1642        region->region_base = PSOC_SCRATCHPAD_ADDR;
1643        region->region_size = PSOC_SCRATCHPAD_SIZE;
1644        region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1645        region->bar_size = CFG_BAR_SIZE;
1646        region->bar_id = CFG_BAR_ID;
1647        region->used = 1;
1648}
1649
1650static int gaudi_sw_init(struct hl_device *hdev)
1651{
1652        struct gaudi_device *gaudi;
1653        u32 i, event_id = 0;
1654        int rc;
1655
1656        /* Allocate device structure */
1657        gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1658        if (!gaudi)
1659                return -ENOMEM;
1660
1661        for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1662                if (gaudi_irq_map_table[i].valid) {
1663                        if (event_id == GAUDI_EVENT_SIZE) {
1664                                dev_err(hdev->dev,
1665                                        "Event array exceeds the limit of %u events\n",
1666                                        GAUDI_EVENT_SIZE);
1667                                rc = -EINVAL;
1668                                goto free_gaudi_device;
1669                        }
1670
1671                        gaudi->events[event_id++] =
1672                                        gaudi_irq_map_table[i].fc_id;
1673                }
1674        }
1675
1676        gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1677
1678        gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1679
1680        hdev->asic_specific = gaudi;
1681
1682        /* Create DMA pool for small allocations */
1683        hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1684                        &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1685        if (!hdev->dma_pool) {
1686                dev_err(hdev->dev, "failed to create DMA pool\n");
1687                rc = -ENOMEM;
1688                goto free_gaudi_device;
1689        }
1690
1691        rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1692        if (rc)
1693                goto free_dma_pool;
1694
1695        hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1696        if (!hdev->cpu_accessible_dma_pool) {
1697                dev_err(hdev->dev,
1698                        "Failed to create CPU accessible DMA pool\n");
1699                rc = -ENOMEM;
1700                goto free_cpu_dma_mem;
1701        }
1702
1703        rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1704                                (uintptr_t) hdev->cpu_accessible_dma_mem,
1705                                HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1706        if (rc) {
1707                dev_err(hdev->dev,
1708                        "Failed to add memory to CPU accessible DMA pool\n");
1709                rc = -EFAULT;
1710                goto free_cpu_accessible_dma_pool;
1711        }
1712
1713        rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1714        if (rc)
1715                goto free_cpu_accessible_dma_pool;
1716
1717        spin_lock_init(&gaudi->hw_queues_lock);
1718        mutex_init(&gaudi->clk_gate_mutex);
1719
1720        hdev->supports_sync_stream = true;
1721        hdev->supports_coresight = true;
1722        hdev->supports_staged_submission = true;
1723
1724        gaudi_set_pci_memory_regions(hdev);
1725
1726        return 0;
1727
1728free_cpu_accessible_dma_pool:
1729        gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1730free_cpu_dma_mem:
1731        if (!hdev->asic_prop.fw_security_enabled)
1732                GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1733                                        hdev->cpu_pci_msb_addr);
1734        hdev->asic_funcs->asic_dma_free_coherent(hdev,
1735                        HL_CPU_ACCESSIBLE_MEM_SIZE,
1736                        hdev->cpu_accessible_dma_mem,
1737                        hdev->cpu_accessible_dma_address);
1738free_dma_pool:
1739        dma_pool_destroy(hdev->dma_pool);
1740free_gaudi_device:
1741        kfree(gaudi);
1742        return rc;
1743}
1744
1745static int gaudi_sw_fini(struct hl_device *hdev)
1746{
1747        struct gaudi_device *gaudi = hdev->asic_specific;
1748
1749        gaudi_free_internal_qmans_pq_mem(hdev);
1750
1751        gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1752
1753        if (!hdev->asic_prop.fw_security_enabled)
1754                GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1755                                        hdev->cpu_pci_msb_addr);
1756
1757        hdev->asic_funcs->asic_dma_free_coherent(hdev,
1758                        HL_CPU_ACCESSIBLE_MEM_SIZE,
1759                        hdev->cpu_accessible_dma_mem,
1760                        hdev->cpu_accessible_dma_address);
1761
1762        dma_pool_destroy(hdev->dma_pool);
1763
1764        mutex_destroy(&gaudi->clk_gate_mutex);
1765
1766        kfree(gaudi);
1767
1768        return 0;
1769}
1770
1771static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1772{
1773        struct hl_device *hdev = arg;
1774        int i;
1775
1776        if (hdev->disabled)
1777                return IRQ_HANDLED;
1778
1779        for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1780                hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1781
1782        hl_irq_handler_eq(irq, &hdev->event_queue);
1783
1784        return IRQ_HANDLED;
1785}
1786
1787/*
1788 * For backward compatibility, new MSI interrupts should be set after the
1789 * existing CPU and NIC interrupts.
1790 */
1791static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1792                                bool cpu_eq)
1793{
1794        int msi_vec;
1795
1796        if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1797                dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1798                                GAUDI_EVENT_QUEUE_MSI_IDX);
1799
1800        msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1801                        (nr + NIC_NUMBER_OF_ENGINES + 1);
1802
1803        return pci_irq_vector(hdev->pdev, msi_vec);
1804}
1805
1806static int gaudi_enable_msi_single(struct hl_device *hdev)
1807{
1808        int rc, irq;
1809
1810        dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1811
1812        irq = gaudi_pci_irq_vector(hdev, 0, false);
1813        rc = request_irq(irq, gaudi_irq_handler_single, 0,
1814                        "gaudi single msi", hdev);
1815        if (rc)
1816                dev_err(hdev->dev,
1817                        "Failed to request single MSI IRQ\n");
1818
1819        return rc;
1820}
1821
1822static int gaudi_enable_msi_multi(struct hl_device *hdev)
1823{
1824        int cq_cnt = hdev->asic_prop.completion_queues_count;
1825        int rc, i, irq_cnt_init, irq;
1826
1827        for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1828                irq = gaudi_pci_irq_vector(hdev, i, false);
1829                rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1830                                &hdev->completion_queue[i]);
1831                if (rc) {
1832                        dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1833                        goto free_irqs;
1834                }
1835        }
1836
1837        irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1838        rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1839                                &hdev->event_queue);
1840        if (rc) {
1841                dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1842                goto free_irqs;
1843        }
1844
1845        return 0;
1846
1847free_irqs:
1848        for (i = 0 ; i < irq_cnt_init ; i++)
1849                free_irq(gaudi_pci_irq_vector(hdev, i, false),
1850                                &hdev->completion_queue[i]);
1851        return rc;
1852}
1853
1854static int gaudi_enable_msi(struct hl_device *hdev)
1855{
1856        struct gaudi_device *gaudi = hdev->asic_specific;
1857        int rc;
1858
1859        if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1860                return 0;
1861
1862        rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
1863        if (rc < 0) {
1864                dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1865                return rc;
1866        }
1867
1868        if (rc < NUMBER_OF_INTERRUPTS) {
1869                gaudi->multi_msi_mode = false;
1870                rc = gaudi_enable_msi_single(hdev);
1871        } else {
1872                gaudi->multi_msi_mode = true;
1873                rc = gaudi_enable_msi_multi(hdev);
1874        }
1875
1876        if (rc)
1877                goto free_pci_irq_vectors;
1878
1879        gaudi->hw_cap_initialized |= HW_CAP_MSI;
1880
1881        return 0;
1882
1883free_pci_irq_vectors:
1884        pci_free_irq_vectors(hdev->pdev);
1885        return rc;
1886}
1887
1888static void gaudi_sync_irqs(struct hl_device *hdev)
1889{
1890        struct gaudi_device *gaudi = hdev->asic_specific;
1891        int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1892
1893        if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1894                return;
1895
1896        /* Wait for all pending IRQs to be finished */
1897        if (gaudi->multi_msi_mode) {
1898                for (i = 0 ; i < cq_cnt ; i++)
1899                        synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1900
1901                synchronize_irq(gaudi_pci_irq_vector(hdev,
1902                                                GAUDI_EVENT_QUEUE_MSI_IDX,
1903                                                true));
1904        } else {
1905                synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1906        }
1907}
1908
1909static void gaudi_disable_msi(struct hl_device *hdev)
1910{
1911        struct gaudi_device *gaudi = hdev->asic_specific;
1912        int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1913
1914        if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1915                return;
1916
1917        gaudi_sync_irqs(hdev);
1918
1919        if (gaudi->multi_msi_mode) {
1920                irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1921                                                true);
1922                free_irq(irq, &hdev->event_queue);
1923
1924                for (i = 0 ; i < cq_cnt ; i++) {
1925                        irq = gaudi_pci_irq_vector(hdev, i, false);
1926                        free_irq(irq, &hdev->completion_queue[i]);
1927                }
1928        } else {
1929                free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1930        }
1931
1932        pci_free_irq_vectors(hdev->pdev);
1933
1934        gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1935}
1936
1937static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1938{
1939        struct gaudi_device *gaudi = hdev->asic_specific;
1940
1941        if (hdev->asic_prop.fw_security_enabled)
1942                return;
1943
1944        if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
1945                                                CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
1946                return;
1947
1948        if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1949                return;
1950
1951        if (!hdev->sram_scrambler_enable)
1952                return;
1953
1954        WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1955                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1956        WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1957                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1958        WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1959                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1960        WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1961                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1962        WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1963                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1964        WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1965                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1966        WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1967                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1968        WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1969                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1970
1971        WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1972                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1973        WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1974                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1975        WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1976                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1977        WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1978                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1979        WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1980                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1981        WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1982                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1983        WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1984                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1985        WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1986                        1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1987
1988        WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1989                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1990        WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1991                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1992        WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1993                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1994        WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1995                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1996        WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1997                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1998        WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1999                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2000        WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2001                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2002        WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2003                        1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2004
2005        gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2006}
2007
2008static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2009{
2010        struct gaudi_device *gaudi = hdev->asic_specific;
2011
2012        if (hdev->asic_prop.fw_security_enabled)
2013                return;
2014
2015        if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2016                                        CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2017                return;
2018
2019        if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2020                return;
2021
2022        if (!hdev->dram_scrambler_enable)
2023                return;
2024
2025        WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2026                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2027        WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2028                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2029        WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2030                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2031        WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2032                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2033        WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2034                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2035        WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2036                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2037        WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2038                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2039        WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2040                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2041
2042        WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2043                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2044        WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2045                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2046        WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2047                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2048        WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2049                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2050        WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2051                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2052        WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2053                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2054        WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2055                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2056        WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2057                        1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2058
2059        WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2060                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2061        WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2062                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2063        WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2064                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2065        WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2066                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2067        WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2068                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2069        WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2070                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2071        WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2072                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2073        WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2074                        1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2075
2076        gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2077}
2078
2079static void gaudi_init_e2e(struct hl_device *hdev)
2080{
2081        if (hdev->asic_prop.fw_security_enabled)
2082                return;
2083
2084        if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2085                                        CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2086                return;
2087
2088        WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2089        WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2090        WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2091        WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2092
2093        WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2094        WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2095        WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2096        WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2097
2098        WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2099        WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2100        WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2101        WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2102
2103        WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2104        WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2105        WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2106        WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2107
2108        WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2109        WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2110        WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2111        WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2112
2113        WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2114        WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2115        WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2116        WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2117
2118        WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2119        WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2120        WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2121        WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2122
2123        WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2124        WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2125        WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2126        WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2127
2128        WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2129        WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2130        WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2131        WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2132
2133        WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2134        WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2135        WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2136        WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2137
2138        WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2139        WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2140        WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2141        WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2142
2143        WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2144        WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2145        WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2146        WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2147
2148        WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2149        WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2150        WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2151        WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2152
2153        WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2154        WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2155        WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2156        WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2157
2158        WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2159        WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2160        WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2161        WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2162
2163        WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2164        WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2165        WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2166        WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2167
2168        WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2169        WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2170        WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2171        WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2172
2173        WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2174        WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2175        WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2176        WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2177
2178        WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2179        WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2180        WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2181        WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2182
2183        WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2184        WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2185        WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2186        WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2187
2188        WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2189        WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2190        WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2191        WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2192
2193        WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2194        WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2195        WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2196        WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2197
2198        WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2199        WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2200        WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2201        WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2202
2203        WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2204        WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2205        WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2206        WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2207
2208        if (!hdev->dram_scrambler_enable) {
2209                WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2210                WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2211                WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2212                WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2213
2214                WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2215                WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2216                WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2217                WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2218
2219                WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2220                WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2221                WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2222                WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2223
2224                WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2225                WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2226                WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2227                WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2228
2229                WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2230                WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2231                WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2232                WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2233
2234                WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2235                WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2236                WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2237                WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2238
2239                WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2240                WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2241                WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2242                WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2243
2244                WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2245                WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2246                WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2247                WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2248
2249                WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2250                WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2251                WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2252                WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2253
2254                WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2255                WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2256                WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2257                WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2258
2259                WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2260                WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2261                WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2262                WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2263
2264                WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2265                WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2266                WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2267                WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2268
2269                WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2270                WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2271                WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2272                WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2273
2274                WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2275                WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2276                WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2277                WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2278
2279                WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2280                WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2281                WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2282                WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2283
2284                WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2285                WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2286                WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2287                WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2288
2289                WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2290                WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2291                WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2292                WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2293
2294                WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2295                WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2296                WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2297                WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2298
2299                WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2300                WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2301                WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2302                WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2303
2304                WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2305                WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2306                WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2307                WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2308
2309                WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2310                WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2311                WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2312                WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2313
2314                WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2315                WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2316                WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2317                WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2318
2319                WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2320                WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2321                WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2322                WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2323
2324                WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2325                WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2326                WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2327                WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2328        }
2329
2330        WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2331                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2332        WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2333                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2334
2335        WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2336                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2337        WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2338                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2339
2340        WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2341                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2342        WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2343                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2344
2345        WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2346                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2347        WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2348                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2349
2350        WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2351                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2352        WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2353                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2354
2355        WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2356                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2357        WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2358                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2359
2360        WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2361                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2362        WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2363                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2364
2365        WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2366                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2367        WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2368                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2369
2370        WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2371                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2372        WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2373                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2374
2375        WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2376                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2377        WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2378                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2379
2380        WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2381                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2382        WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2383                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2384
2385        WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2386                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2387        WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2388                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2389
2390        WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2391                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2392        WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2393                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2394
2395        WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2396                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2397        WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2398                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2399
2400        WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2401                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2402        WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2403                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2404
2405        WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2406                        1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2407        WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2408                        1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2409
2410        WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2411                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2412        WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2413                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2414
2415        WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2416                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2417        WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2418                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2419
2420        WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2421                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2422        WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2423                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2424
2425        WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2426                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2427        WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2428                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2429
2430        WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2431                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2432        WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2433                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2434
2435        WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2436                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2437        WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2438                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2439
2440        WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2441                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2442        WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2443                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2444
2445        WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2446                        1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2447        WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2448                        1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2449}
2450
2451static void gaudi_init_hbm_cred(struct hl_device *hdev)
2452{
2453        uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2454
2455        if (hdev->asic_prop.fw_security_enabled)
2456                return;
2457
2458        if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2459                                                CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2460                return;
2461
2462        hbm0_wr = 0x33333333;
2463        hbm0_rd = 0x77777777;
2464        hbm1_wr = 0x55555555;
2465        hbm1_rd = 0xDDDDDDDD;
2466
2467        WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2468        WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2469        WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2470        WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2471
2472        WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2473        WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2474        WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2475        WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2476
2477        WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2478        WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2479        WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2480        WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2481
2482        WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2483        WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2484        WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2485        WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2486
2487        WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2488                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2489                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2490        WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2491                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2492                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2493        WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2494                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2495                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2496        WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2497                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2498                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2499
2500        WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2501                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2502                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2503        WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2504                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2505                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2506        WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2507                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2508                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2509        WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2510                        (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2511                        (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2512}
2513
2514static void gaudi_init_golden_registers(struct hl_device *hdev)
2515{
2516        u32 tpc_offset;
2517        int tpc_id, i;
2518
2519        gaudi_init_e2e(hdev);
2520        gaudi_init_hbm_cred(hdev);
2521
2522        for (tpc_id = 0, tpc_offset = 0;
2523                                tpc_id < TPC_NUMBER_OF_ENGINES;
2524                                tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2525                /* Mask all arithmetic interrupts from TPC */
2526                WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2527                /* Set 16 cache lines */
2528                WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2529                                ICACHE_FETCH_LINE_NUM, 2);
2530        }
2531
2532        /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2533        for (i = 0 ; i < 128 ; i += 8)
2534                writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2535
2536        WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2537        WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2538        WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2539        WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2540}
2541
2542static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2543                                        int qman_id, dma_addr_t qman_pq_addr)
2544{
2545        struct cpu_dyn_regs *dyn_regs =
2546                        &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2547        u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2548        u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2549        u32 q_off, dma_qm_offset;
2550        u32 dma_qm_err_cfg, irq_handler_offset;
2551
2552        dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2553
2554        mtr_base_en_lo = lower_32_bits(CFG_BASE +
2555                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2556        mtr_base_en_hi = upper_32_bits(CFG_BASE +
2557                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2558        so_base_en_lo = lower_32_bits(CFG_BASE +
2559                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2560        so_base_en_hi = upper_32_bits(CFG_BASE +
2561                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2562        mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2563                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2564        mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2565                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2566        so_base_ws_lo = lower_32_bits(CFG_BASE +
2567                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2568        so_base_ws_hi = upper_32_bits(CFG_BASE +
2569                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2570
2571        q_off = dma_qm_offset + qman_id * 4;
2572
2573        WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2574        WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2575
2576        WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2577        WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2578        WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2579
2580        WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2581        WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2582                                                        QMAN_LDMA_SRC_OFFSET);
2583        WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2584                                                        QMAN_LDMA_DST_OFFSET);
2585
2586        WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2587        WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2588        WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2589        WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2590        WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2591        WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2592        WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2593        WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2594
2595        WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2596
2597        /* The following configuration is needed only once per QMAN */
2598        if (qman_id == 0) {
2599                irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2600                                mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2601                                le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2602
2603                /* Configure RAZWI IRQ */
2604                dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2605                if (hdev->stop_on_err)
2606                        dma_qm_err_cfg |=
2607                                PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2608
2609                WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2610
2611                WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2612                        lower_32_bits(CFG_BASE + irq_handler_offset));
2613                WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2614                        upper_32_bits(CFG_BASE + irq_handler_offset));
2615
2616                WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2617                        gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2618                                                                        dma_id);
2619
2620                WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2621                                QM_ARB_ERR_MSG_EN_MASK);
2622
2623                /* Increase ARB WDT to support streams architecture */
2624                WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2625                                GAUDI_ARB_WDT_TIMEOUT);
2626
2627                WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2628                                QMAN_EXTERNAL_MAKE_TRUSTED);
2629
2630                WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2631        }
2632}
2633
2634static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2635{
2636        struct cpu_dyn_regs *dyn_regs =
2637                        &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2638        u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2639        u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2640        u32 irq_handler_offset;
2641
2642        /* Set to maximum possible according to physical size */
2643        WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2644        WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2645
2646        /* WA for H/W bug H3-2116 */
2647        WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2648
2649        /* STOP_ON bit implies no completion to operation in case of RAZWI */
2650        if (hdev->stop_on_err)
2651                dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2652
2653        WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2654
2655        irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2656                        mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2657                        le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2658
2659        WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2660                lower_32_bits(CFG_BASE + irq_handler_offset));
2661        WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2662                upper_32_bits(CFG_BASE + irq_handler_offset));
2663
2664        WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2665                gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2666        WREG32(mmDMA0_CORE_PROT + dma_offset,
2667                        1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2668        /* If the channel is secured, it should be in MMU bypass mode */
2669        WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2670                        1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2671        WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2672}
2673
2674static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2675                                u32 enable_mask)
2676{
2677        u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2678
2679        WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2680}
2681
2682static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2683{
2684        struct gaudi_device *gaudi = hdev->asic_specific;
2685        struct hl_hw_queue *q;
2686        int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2687
2688        if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2689                return;
2690
2691        for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2692                dma_id = gaudi_dma_assignment[i];
2693                /*
2694                 * For queues after the CPU Q need to add 1 to get the correct
2695                 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2696                 * order to get the correct MSI register.
2697                 */
2698                if (dma_id > 1) {
2699                        cpu_skip = 1;
2700                        nic_skip = NIC_NUMBER_OF_ENGINES;
2701                } else {
2702                        cpu_skip = 0;
2703                        nic_skip = 0;
2704                }
2705
2706                for (j = 0 ; j < QMAN_STREAMS ; j++) {
2707                        q_idx = 4 * dma_id + j + cpu_skip;
2708                        q = &hdev->kernel_queues[q_idx];
2709                        q->cq_id = cq_id++;
2710                        q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2711                        gaudi_init_pci_dma_qman(hdev, dma_id, j,
2712                                                q->bus_address);
2713                }
2714
2715                gaudi_init_dma_core(hdev, dma_id);
2716
2717                gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2718        }
2719
2720        gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2721}
2722
2723static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2724                                        int qman_id, u64 qman_base_addr)
2725{
2726        struct cpu_dyn_regs *dyn_regs =
2727                        &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2728        u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2729        u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2730        u32 dma_qm_err_cfg, irq_handler_offset;
2731        u32 q_off, dma_qm_offset;
2732
2733        dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2734
2735        mtr_base_en_lo = lower_32_bits(CFG_BASE +
2736                        mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2737        mtr_base_en_hi = upper_32_bits(CFG_BASE +
2738                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2739        so_base_en_lo = lower_32_bits(CFG_BASE +
2740                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2741        so_base_en_hi = upper_32_bits(CFG_BASE +
2742                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2743        mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2744                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2745        mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2746                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2747        so_base_ws_lo = lower_32_bits(CFG_BASE +
2748                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2749        so_base_ws_hi = upper_32_bits(CFG_BASE +
2750                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2751
2752        q_off = dma_qm_offset + qman_id * 4;
2753
2754        if (qman_id < 4) {
2755                WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2756                                        lower_32_bits(qman_base_addr));
2757                WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2758                                        upper_32_bits(qman_base_addr));
2759
2760                WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2761                WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2762                WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2763
2764                WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2765                                                        QMAN_CPDMA_SIZE_OFFSET);
2766                WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2767                                                        QMAN_CPDMA_SRC_OFFSET);
2768                WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2769                                                        QMAN_CPDMA_DST_OFFSET);
2770        } else {
2771                irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2772                                mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2773                                le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2774
2775                WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2776                                                        QMAN_LDMA_SIZE_OFFSET);
2777                WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2778                                                        QMAN_LDMA_SRC_OFFSET);
2779                WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2780                                                        QMAN_LDMA_DST_OFFSET);
2781
2782                /* Configure RAZWI IRQ */
2783                dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2784                if (hdev->stop_on_err)
2785                        dma_qm_err_cfg |=
2786                                HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2787
2788                WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2789
2790                WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2791                        lower_32_bits(CFG_BASE + irq_handler_offset));
2792                WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2793                        upper_32_bits(CFG_BASE + irq_handler_offset));
2794
2795                WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2796                        gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2797                                                                        dma_id);
2798
2799                WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2800                                QM_ARB_ERR_MSG_EN_MASK);
2801
2802                /* Increase ARB WDT to support streams architecture */
2803                WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2804                                GAUDI_ARB_WDT_TIMEOUT);
2805
2806                WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2807                WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2808                                QMAN_INTERNAL_MAKE_TRUSTED);
2809        }
2810
2811        WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2812        WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2813        WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2814        WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2815
2816        /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2817        if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2818                WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2819                                mtr_base_ws_lo);
2820                WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2821                                mtr_base_ws_hi);
2822                WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2823                                so_base_ws_lo);
2824                WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2825                                so_base_ws_hi);
2826        }
2827}
2828
2829static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2830{
2831        struct gaudi_device *gaudi = hdev->asic_specific;
2832        struct gaudi_internal_qman_info *q;
2833        u64 qman_base_addr;
2834        int i, j, dma_id, internal_q_index;
2835
2836        if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2837                return;
2838
2839        for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2840                dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2841
2842                for (j = 0 ; j < QMAN_STREAMS ; j++) {
2843                         /*
2844                          * Add the CPU queue in order to get the correct queue
2845                          * number as all internal queue are placed after it
2846                          */
2847                        internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2848
2849                        q = &gaudi->internal_qmans[internal_q_index];
2850                        qman_base_addr = (u64) q->pq_dma_addr;
2851                        gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2852                                                qman_base_addr);
2853                }
2854
2855                /* Initializing lower CP for HBM DMA QMAN */
2856                gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2857
2858                gaudi_init_dma_core(hdev, dma_id);
2859
2860                gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2861        }
2862
2863        gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2864}
2865
2866static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2867                                        int qman_id, u64 qman_base_addr)
2868{
2869        struct cpu_dyn_regs *dyn_regs =
2870                        &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2871        u32 mtr_base_lo, mtr_base_hi;
2872        u32 so_base_lo, so_base_hi;
2873        u32 irq_handler_offset;
2874        u32 q_off, mme_id;
2875        u32 mme_qm_err_cfg;
2876
2877        mtr_base_lo = lower_32_bits(CFG_BASE +
2878                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2879        mtr_base_hi = upper_32_bits(CFG_BASE +
2880                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2881        so_base_lo = lower_32_bits(CFG_BASE +
2882                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2883        so_base_hi = upper_32_bits(CFG_BASE +
2884                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2885
2886        q_off = mme_offset + qman_id * 4;
2887
2888        if (qman_id < 4) {
2889                WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2890                                        lower_32_bits(qman_base_addr));
2891                WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2892                                        upper_32_bits(qman_base_addr));
2893
2894                WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2895                WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2896                WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2897
2898                WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2899                                                        QMAN_CPDMA_SIZE_OFFSET);
2900                WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2901                                                        QMAN_CPDMA_SRC_OFFSET);
2902                WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2903                                                        QMAN_CPDMA_DST_OFFSET);
2904        } else {
2905                irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2906                                mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2907                                le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2908
2909                WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2910                                                        QMAN_LDMA_SIZE_OFFSET);
2911                WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2912                                                        QMAN_LDMA_SRC_OFFSET);
2913                WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2914                                                        QMAN_LDMA_DST_OFFSET);
2915
2916                /* Configure RAZWI IRQ */
2917                mme_id = mme_offset /
2918                                (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2919
2920                mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2921                if (hdev->stop_on_err)
2922                        mme_qm_err_cfg |=
2923                                MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2924
2925                WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2926
2927                WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2928                        lower_32_bits(CFG_BASE + irq_handler_offset));
2929                WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2930                        upper_32_bits(CFG_BASE + irq_handler_offset));
2931
2932                WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2933                        gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2934                                                                        mme_id);
2935
2936                WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2937                                QM_ARB_ERR_MSG_EN_MASK);
2938
2939                /* Increase ARB WDT to support streams architecture */
2940                WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2941                                GAUDI_ARB_WDT_TIMEOUT);
2942
2943                WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2944                WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2945                                QMAN_INTERNAL_MAKE_TRUSTED);
2946        }
2947
2948        WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2949        WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2950        WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2951        WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2952}
2953
2954static void gaudi_init_mme_qmans(struct hl_device *hdev)
2955{
2956        struct gaudi_device *gaudi = hdev->asic_specific;
2957        struct gaudi_internal_qman_info *q;
2958        u64 qman_base_addr;
2959        u32 mme_offset;
2960        int i, internal_q_index;
2961
2962        if (gaudi->hw_cap_initialized & HW_CAP_MME)
2963                return;
2964
2965        /*
2966         * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2967         * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2968         */
2969
2970        mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2971
2972        for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2973                internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2974                q = &gaudi->internal_qmans[internal_q_index];
2975                qman_base_addr = (u64) q->pq_dma_addr;
2976                gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2977                                        qman_base_addr);
2978                if (i == 3)
2979                        mme_offset = 0;
2980        }
2981
2982        /* Initializing lower CP for MME QMANs */
2983        mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2984        gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2985        gaudi_init_mme_qman(hdev, 0, 4, 0);
2986
2987        WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2988        WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2989
2990        gaudi->hw_cap_initialized |= HW_CAP_MME;
2991}
2992
2993static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2994                                int qman_id, u64 qman_base_addr)
2995{
2996        struct cpu_dyn_regs *dyn_regs =
2997                        &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2998        u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2999        u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3000        u32 tpc_qm_err_cfg, irq_handler_offset;
3001        u32 q_off, tpc_id;
3002
3003        mtr_base_en_lo = lower_32_bits(CFG_BASE +
3004                        mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3005        mtr_base_en_hi = upper_32_bits(CFG_BASE +
3006                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3007        so_base_en_lo = lower_32_bits(CFG_BASE +
3008                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3009        so_base_en_hi = upper_32_bits(CFG_BASE +
3010                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3011        mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3012                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3013        mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3014                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3015        so_base_ws_lo = lower_32_bits(CFG_BASE +
3016                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3017        so_base_ws_hi = upper_32_bits(CFG_BASE +
3018                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3019
3020        q_off = tpc_offset + qman_id * 4;
3021
3022        tpc_id = tpc_offset /
3023                        (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3024
3025        if (qman_id < 4) {
3026                WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3027                                        lower_32_bits(qman_base_addr));
3028                WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3029                                        upper_32_bits(qman_base_addr));
3030
3031                WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3032                WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3033                WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3034
3035                WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3036                                                        QMAN_CPDMA_SIZE_OFFSET);
3037                WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3038                                                        QMAN_CPDMA_SRC_OFFSET);
3039                WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3040                                                        QMAN_CPDMA_DST_OFFSET);
3041        } else {
3042                irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3043                                mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3044                                le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3045
3046                WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3047                                                        QMAN_LDMA_SIZE_OFFSET);
3048                WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3049                                                        QMAN_LDMA_SRC_OFFSET);
3050                WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3051                                                        QMAN_LDMA_DST_OFFSET);
3052
3053                /* Configure RAZWI IRQ */
3054                tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3055                if (hdev->stop_on_err)
3056                        tpc_qm_err_cfg |=
3057                                TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3058
3059                WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3060
3061                WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3062                        lower_32_bits(CFG_BASE + irq_handler_offset));
3063                WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3064                        upper_32_bits(CFG_BASE + irq_handler_offset));
3065
3066                WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3067                        gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3068                                                                        tpc_id);
3069
3070                WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3071                                QM_ARB_ERR_MSG_EN_MASK);
3072
3073                /* Increase ARB WDT to support streams architecture */
3074                WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3075                                GAUDI_ARB_WDT_TIMEOUT);
3076
3077                WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3078                WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3079                                QMAN_INTERNAL_MAKE_TRUSTED);
3080        }
3081
3082        WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3083        WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3084        WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3085        WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3086
3087        /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3088        if (tpc_id == 6) {
3089                WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3090                                mtr_base_ws_lo);
3091                WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3092                                mtr_base_ws_hi);
3093                WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3094                                so_base_ws_lo);
3095                WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3096                                so_base_ws_hi);
3097        }
3098}
3099
3100static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3101{
3102        struct gaudi_device *gaudi = hdev->asic_specific;
3103        struct gaudi_internal_qman_info *q;
3104        u64 qman_base_addr;
3105        u32 so_base_hi, tpc_offset = 0;
3106        u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3107                        mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3108        int i, tpc_id, internal_q_index;
3109
3110        if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3111                return;
3112
3113        so_base_hi = upper_32_bits(CFG_BASE +
3114                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3115
3116        for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3117                for (i = 0 ; i < QMAN_STREAMS ; i++) {
3118                        internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3119                                                tpc_id * QMAN_STREAMS + i;
3120                        q = &gaudi->internal_qmans[internal_q_index];
3121                        qman_base_addr = (u64) q->pq_dma_addr;
3122                        gaudi_init_tpc_qman(hdev, tpc_offset, i,
3123                                                qman_base_addr);
3124
3125                        if (i == 3) {
3126                                /* Initializing lower CP for TPC QMAN */
3127                                gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3128
3129                                /* Enable the QMAN and TPC channel */
3130                                WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3131                                                QMAN_TPC_ENABLE);
3132                        }
3133                }
3134
3135                WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3136                                so_base_hi);
3137
3138                tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3139
3140                gaudi->hw_cap_initialized |=
3141                                FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3142        }
3143}
3144
3145static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3146                                int qman_id, u64 qman_base_addr, int nic_id)
3147{
3148        struct cpu_dyn_regs *dyn_regs =
3149                        &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3150        u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3151        u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3152        u32 nic_qm_err_cfg, irq_handler_offset;
3153        u32 q_off;
3154
3155        mtr_base_en_lo = lower_32_bits(CFG_BASE +
3156                        mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3157        mtr_base_en_hi = upper_32_bits(CFG_BASE +
3158                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3159        so_base_en_lo = lower_32_bits(CFG_BASE +
3160                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3161        so_base_en_hi = upper_32_bits(CFG_BASE +
3162                                mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3163        mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3164                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3165        mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3166                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3167        so_base_ws_lo = lower_32_bits(CFG_BASE +
3168                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3169        so_base_ws_hi = upper_32_bits(CFG_BASE +
3170                                mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3171
3172        q_off = nic_offset + qman_id * 4;
3173
3174        WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3175        WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3176
3177        WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3178        WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3179        WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3180
3181        WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3182                                                        QMAN_LDMA_SIZE_OFFSET);
3183        WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3184                                                        QMAN_LDMA_SRC_OFFSET);
3185        WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3186                                                        QMAN_LDMA_DST_OFFSET);
3187
3188        WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3189        WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3190        WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3191        WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3192
3193        /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3194        WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3195        WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3196        WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3197        WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3198
3199        if (qman_id == 0) {
3200                irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3201                                mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3202                                le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3203
3204                /* Configure RAZWI IRQ */
3205                nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3206                if (hdev->stop_on_err)
3207                        nic_qm_err_cfg |=
3208                                NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3209
3210                WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3211
3212                WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3213                        lower_32_bits(CFG_BASE + irq_handler_offset));
3214                WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3215                        upper_32_bits(CFG_BASE + irq_handler_offset));
3216
3217                WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3218                        gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3219                                                                        nic_id);
3220
3221                WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3222                                QM_ARB_ERR_MSG_EN_MASK);
3223
3224                /* Increase ARB WDT to support streams architecture */
3225                WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3226                                GAUDI_ARB_WDT_TIMEOUT);
3227
3228                WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3229                WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3230                                QMAN_INTERNAL_MAKE_TRUSTED);
3231        }
3232}
3233
3234static void gaudi_init_nic_qmans(struct hl_device *hdev)
3235{
3236        struct gaudi_device *gaudi = hdev->asic_specific;
3237        struct gaudi_internal_qman_info *q;
3238        u64 qman_base_addr;
3239        u32 nic_offset = 0;
3240        u32 nic_delta_between_qmans =
3241                        mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3242        u32 nic_delta_between_nics =
3243                        mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3244        int i, nic_id, internal_q_index;
3245
3246        if (!hdev->nic_ports_mask)
3247                return;
3248
3249        if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3250                return;
3251
3252        dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3253
3254        for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3255                if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3256                        nic_offset += nic_delta_between_qmans;
3257                        if (nic_id & 1) {
3258                                nic_offset -= (nic_delta_between_qmans * 2);
3259                                nic_offset += nic_delta_between_nics;
3260                        }
3261                        continue;
3262                }
3263
3264                for (i = 0 ; i < QMAN_STREAMS ; i++) {
3265                        internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3266                                                nic_id * QMAN_STREAMS + i;
3267                        q = &gaudi->internal_qmans[internal_q_index];
3268                        qman_base_addr = (u64) q->pq_dma_addr;
3269                        gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3270                                                qman_base_addr, nic_id);
3271                }
3272
3273                /* Enable the QMAN */
3274                WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3275
3276                nic_offset += nic_delta_between_qmans;
3277                if (nic_id & 1) {
3278                        nic_offset -= (nic_delta_between_qmans * 2);
3279                        nic_offset += nic_delta_between_nics;
3280                }
3281
3282                gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3283        }
3284}
3285
3286static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3287{
3288        struct gaudi_device *gaudi = hdev->asic_specific;
3289
3290        if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3291                return;
3292
3293        WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3294        WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3295        WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3296}
3297
3298static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3299{
3300        struct gaudi_device *gaudi = hdev->asic_specific;
3301
3302        if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3303                return;
3304
3305        WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3306        WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3307        WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3308        WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3309        WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3310}
3311
3312static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3313{
3314        struct gaudi_device *gaudi = hdev->asic_specific;
3315
3316        if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3317                return;
3318
3319        WREG32(mmMME2_QM_GLBL_CFG0, 0);
3320        WREG32(mmMME0_QM_GLBL_CFG0, 0);
3321}
3322
3323static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3324{
3325        struct gaudi_device *gaudi = hdev->asic_specific;
3326        u32 tpc_offset = 0;
3327        int tpc_id;
3328
3329        if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3330                return;
3331
3332        for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3333                WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3334                tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3335        }
3336}
3337
3338static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3339{
3340        struct gaudi_device *gaudi = hdev->asic_specific;
3341        u32 nic_mask, nic_offset = 0;
3342        u32 nic_delta_between_qmans =
3343                        mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3344        u32 nic_delta_between_nics =
3345                        mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3346        int nic_id;
3347
3348        for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3349                nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3350
3351                if (gaudi->hw_cap_initialized & nic_mask)
3352                        WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3353
3354                nic_offset += nic_delta_between_qmans;
3355                if (nic_id & 1) {
3356                        nic_offset -= (nic_delta_between_qmans * 2);
3357                        nic_offset += nic_delta_between_nics;
3358                }
3359        }
3360}
3361
3362static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3363{
3364        struct gaudi_device *gaudi = hdev->asic_specific;
3365
3366        if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3367                return;
3368
3369        /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3370        WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3371        WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3372        WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3373}
3374
3375static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3376{
3377        struct gaudi_device *gaudi = hdev->asic_specific;
3378
3379        if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3380                return;
3381
3382        /* Stop CPs of HBM DMA QMANs */
3383
3384        WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3385        WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3386        WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3387        WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3388        WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3389}
3390
3391static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3392{
3393        struct gaudi_device *gaudi = hdev->asic_specific;
3394
3395        if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3396                return;
3397
3398        /* Stop CPs of MME QMANs */
3399        WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3400        WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3401}
3402
3403static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3404{
3405        struct gaudi_device *gaudi = hdev->asic_specific;
3406
3407        if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3408                return;
3409
3410        WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3411        WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3412        WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413        WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414        WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415        WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3416        WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3417        WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3418}
3419
3420static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3421{
3422        struct gaudi_device *gaudi = hdev->asic_specific;
3423
3424        /* Stop upper CPs of QMANs */
3425
3426        if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3427                WREG32(mmNIC0_QM0_GLBL_CFG1,
3428                                NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3429                                NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3430                                NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3431
3432        if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3433                WREG32(mmNIC0_QM1_GLBL_CFG1,
3434                                NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3435                                NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3436                                NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3437
3438        if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3439                WREG32(mmNIC1_QM0_GLBL_CFG1,
3440                                NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3441                                NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3442                                NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3443
3444        if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3445                WREG32(mmNIC1_QM1_GLBL_CFG1,
3446                                NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3447                                NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3448                                NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3449
3450        if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3451                WREG32(mmNIC2_QM0_GLBL_CFG1,
3452                                NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3453                                NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3454                                NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3455
3456        if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3457                WREG32(mmNIC2_QM1_GLBL_CFG1,
3458                                NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3459                                NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3460                                NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3461
3462        if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3463                WREG32(mmNIC3_QM0_GLBL_CFG1,
3464                                NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3465                                NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3466                                NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3467
3468        if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3469                WREG32(mmNIC3_QM1_GLBL_CFG1,
3470                                NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3471                                NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3472                                NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3473
3474        if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3475                WREG32(mmNIC4_QM0_GLBL_CFG1,
3476                                NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3477                                NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3478                                NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3479
3480        if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3481                WREG32(mmNIC4_QM1_GLBL_CFG1,
3482                                NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3483                                NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3484                                NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3485}
3486
3487static void gaudi_pci_dma_stall(struct hl_device *hdev)
3488{
3489        struct gaudi_device *gaudi = hdev->asic_specific;
3490
3491        if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3492                return;
3493
3494        WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3495        WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3496        WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3497}
3498
3499static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3500{
3501        struct gaudi_device *gaudi = hdev->asic_specific;
3502
3503        if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3504                return;
3505
3506        WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3507        WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3508        WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3509        WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3510        WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3511}
3512
3513static void gaudi_mme_stall(struct hl_device *hdev)
3514{
3515        struct gaudi_device *gaudi = hdev->asic_specific;
3516
3517        if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3518                return;
3519
3520        /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3521        WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3522        WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3523        WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3524        WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3525        WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3526        WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3527        WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3528        WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3529        WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3530        WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3531        WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3532        WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3533        WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3534        WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3535        WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3536        WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3537}
3538
3539static void gaudi_tpc_stall(struct hl_device *hdev)
3540{
3541        struct gaudi_device *gaudi = hdev->asic_specific;
3542
3543        if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3544                return;
3545
3546        WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3547        WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3548        WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549        WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550        WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551        WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3552        WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3553        WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3554}
3555
3556static void gaudi_set_clock_gating(struct hl_device *hdev)
3557{
3558        struct gaudi_device *gaudi = hdev->asic_specific;
3559        u32 qman_offset;
3560        bool enable;
3561        int i;
3562
3563        /* In case we are during debug session, don't enable the clock gate
3564         * as it may interfere
3565         */
3566        if (hdev->in_debug)
3567                return;
3568
3569        if (hdev->asic_prop.fw_security_enabled)
3570                return;
3571
3572        for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3573                enable = !!(hdev->clock_gating_mask &
3574                                (BIT_ULL(gaudi_dma_assignment[i])));
3575
3576                qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3577                WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3578                                enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3579                WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3580                                enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3581        }
3582
3583        for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3584                enable = !!(hdev->clock_gating_mask &
3585                                (BIT_ULL(gaudi_dma_assignment[i])));
3586
3587                /* GC sends work to DMA engine through Upper CP in DMA5 so
3588                 * we need to not enable clock gating in that DMA
3589                 */
3590                if (i == GAUDI_HBM_DMA_4)
3591                        enable = 0;
3592
3593                qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3594                WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3595                                enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3596                WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3597                                enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3598        }
3599
3600        enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3601        WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3602        WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3603
3604        enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3605        WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3606        WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3607
3608        for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3609                enable = !!(hdev->clock_gating_mask &
3610                                (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3611
3612                WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3613                                enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3614                WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3615                                enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3616
3617                qman_offset += TPC_QMAN_OFFSET;
3618        }
3619
3620        gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3621}
3622
3623static void gaudi_disable_clock_gating(struct hl_device *hdev)
3624{
3625        struct gaudi_device *gaudi = hdev->asic_specific;
3626        u32 qman_offset;
3627        int i;
3628
3629        if (hdev->asic_prop.fw_security_enabled)
3630                return;
3631
3632        for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3633                WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3634                WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3635
3636                qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3637        }
3638
3639        WREG32(mmMME0_QM_CGM_CFG, 0);
3640        WREG32(mmMME0_QM_CGM_CFG1, 0);
3641        WREG32(mmMME2_QM_CGM_CFG, 0);
3642        WREG32(mmMME2_QM_CGM_CFG1, 0);
3643
3644        for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3645                WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3646                WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3647
3648                qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3649        }
3650
3651        gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3652}
3653
3654static void gaudi_enable_timestamp(struct hl_device *hdev)
3655{
3656        /* Disable the timestamp counter */
3657        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3658
3659        /* Zero the lower/upper parts of the 64-bit counter */
3660        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3661        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3662
3663        /* Enable the counter */
3664        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3665}
3666
3667static void gaudi_disable_timestamp(struct hl_device *hdev)
3668{
3669        /* Disable the timestamp counter */
3670        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3671}
3672
3673static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3674{
3675        u32 wait_timeout_ms;
3676
3677        dev_info(hdev->dev,
3678                "Halting compute engines and disabling interrupts\n");
3679
3680        if (hdev->pldm)
3681                wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3682        else
3683                wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3684
3685        gaudi_stop_nic_qmans(hdev);
3686        gaudi_stop_mme_qmans(hdev);
3687        gaudi_stop_tpc_qmans(hdev);
3688        gaudi_stop_hbm_dma_qmans(hdev);
3689        gaudi_stop_pci_dma_qmans(hdev);
3690
3691        hdev->asic_funcs->disable_clock_gating(hdev);
3692
3693        msleep(wait_timeout_ms);
3694
3695        gaudi_pci_dma_stall(hdev);
3696        gaudi_hbm_dma_stall(hdev);
3697        gaudi_tpc_stall(hdev);
3698        gaudi_mme_stall(hdev);
3699
3700        msleep(wait_timeout_ms);
3701
3702        gaudi_disable_nic_qmans(hdev);
3703        gaudi_disable_mme_qmans(hdev);
3704        gaudi_disable_tpc_qmans(hdev);
3705        gaudi_disable_hbm_dma_qmans(hdev);
3706        gaudi_disable_pci_dma_qmans(hdev);
3707
3708        gaudi_disable_timestamp(hdev);
3709
3710        gaudi_disable_msi(hdev);
3711}
3712
3713static int gaudi_mmu_init(struct hl_device *hdev)
3714{
3715        struct asic_fixed_properties *prop = &hdev->asic_prop;
3716        struct gaudi_device *gaudi = hdev->asic_specific;
3717        u64 hop0_addr;
3718        int rc, i;
3719
3720        if (!hdev->mmu_enable)
3721                return 0;
3722
3723        if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3724                return 0;
3725
3726        for (i = 0 ; i < prop->max_asid ; i++) {
3727                hop0_addr = prop->mmu_pgt_addr +
3728                                (i * prop->mmu_hop_table_size);
3729
3730                rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3731                if (rc) {
3732                        dev_err(hdev->dev,
3733                                "failed to set hop0 addr for asid %d\n", i);
3734                        goto err;
3735                }
3736        }
3737
3738        /* init MMU cache manage page */
3739        WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3740        WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3741
3742        hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3743
3744        WREG32(mmMMU_UP_MMU_ENABLE, 1);
3745        WREG32(mmMMU_UP_SPI_MASK, 0xF);
3746
3747        WREG32(mmSTLB_HOP_CONFIGURATION,
3748                        hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3749
3750        /*
3751         * The H/W expects the first PI after init to be 1. After wraparound
3752         * we'll write 0.
3753         */
3754        gaudi->mmu_cache_inv_pi = 1;
3755
3756        gaudi->hw_cap_initialized |= HW_CAP_MMU;
3757
3758        return 0;
3759
3760err:
3761        return rc;
3762}
3763
3764static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3765{
3766        void __iomem *dst;
3767
3768        dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3769
3770        return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3771}
3772
3773static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3774{
3775        void __iomem *dst;
3776
3777        dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3778
3779        return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3780}
3781
3782static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3783{
3784        struct dynamic_fw_load_mgr *dynamic_loader;
3785        struct cpu_dyn_regs *dyn_regs;
3786
3787        dynamic_loader = &hdev->fw_loader.dynamic_loader;
3788
3789        /*
3790         * here we update initial values for few specific dynamic regs (as
3791         * before reading the first descriptor from FW those value has to be
3792         * hard-coded) in later stages of the protocol those values will be
3793         * updated automatically by reading the FW descriptor so data there
3794         * will always be up-to-date
3795         */
3796        dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3797        dyn_regs->kmd_msg_to_cpu =
3798                                cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3799        dyn_regs->cpu_cmd_status_to_host =
3800                                cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3801
3802        dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3803}
3804
3805static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3806{
3807        struct static_fw_load_mgr *static_loader;
3808
3809        static_loader = &hdev->fw_loader.static_loader;
3810
3811        static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3812        static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3813        static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3814        static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3815        static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3816        static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3817        static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3818        static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3819        static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3820        static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3821        static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3822        static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3823        static_loader->cpu_reset_wait_msec = hdev->pldm ?
3824                        GAUDI_PLDM_RESET_WAIT_MSEC :
3825                        GAUDI_CPU_RESET_WAIT_MSEC;
3826}
3827
3828static void gaudi_init_firmware_loader(struct hl_device *hdev)
3829{
3830        struct asic_fixed_properties *prop = &hdev->asic_prop;
3831        struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3832
3833        /* fill common fields */
3834        fw_loader->linux_loaded = false;
3835        fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3836        fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3837        fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3838        fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3839        fw_loader->skip_bmc = !hdev->bmc_enable;
3840        fw_loader->sram_bar_id = SRAM_BAR_ID;
3841        fw_loader->dram_bar_id = HBM_BAR_ID;
3842
3843        if (prop->dynamic_fw_load)
3844                gaudi_init_dynamic_firmware_loader(hdev);
3845        else
3846                gaudi_init_static_firmware_loader(hdev);
3847}
3848
3849static int gaudi_init_cpu(struct hl_device *hdev)
3850{
3851        struct gaudi_device *gaudi = hdev->asic_specific;
3852        int rc;
3853
3854        if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3855                return 0;
3856
3857        if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3858                return 0;
3859
3860        /*
3861         * The device CPU works with 40 bits addresses.
3862         * This register sets the extension to 50 bits.
3863         */
3864        if (!hdev->asic_prop.fw_security_enabled)
3865                WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3866
3867        rc = hl_fw_init_cpu(hdev);
3868
3869        if (rc)
3870                return rc;
3871
3872        gaudi->hw_cap_initialized |= HW_CAP_CPU;
3873
3874        return 0;
3875}
3876
3877static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3878{
3879        struct cpu_dyn_regs *dyn_regs =
3880                        &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3881        struct asic_fixed_properties *prop = &hdev->asic_prop;
3882        struct gaudi_device *gaudi = hdev->asic_specific;
3883        u32 status, irq_handler_offset;
3884        struct hl_eq *eq;
3885        struct hl_hw_queue *cpu_pq =
3886                        &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3887        int err;
3888
3889        if (!hdev->cpu_queues_enable)
3890                return 0;
3891
3892        if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3893                return 0;
3894
3895        eq = &hdev->event_queue;
3896
3897        WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3898        WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3899
3900        WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3901        WREG32(mmCPU