linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
<<
>>
Prefs
   1/*
   2 * Copyright 2008 Advanced Micro Devices, Inc.
   3 * Copyright 2008 Red Hat Inc.
   4 * Copyright 2009 Jerome Glisse.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22 * OTHER DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors: Dave Airlie
  25 *          Alex Deucher
  26 *          Jerome Glisse
  27 */
  28#include <linux/power_supply.h>
  29#include <linux/kthread.h>
  30#include <linux/module.h>
  31#include <linux/console.h>
  32#include <linux/slab.h>
  33
  34#include <drm/drm_atomic_helper.h>
  35#include <drm/drm_probe_helper.h>
  36#include <drm/amdgpu_drm.h>
  37#include <linux/vgaarb.h>
  38#include <linux/vga_switcheroo.h>
  39#include <linux/efi.h>
  40#include "amdgpu.h"
  41#include "amdgpu_trace.h"
  42#include "amdgpu_i2c.h"
  43#include "atom.h"
  44#include "amdgpu_atombios.h"
  45#include "amdgpu_atomfirmware.h"
  46#include "amd_pcie.h"
  47#ifdef CONFIG_DRM_AMDGPU_SI
  48#include "si.h"
  49#endif
  50#ifdef CONFIG_DRM_AMDGPU_CIK
  51#include "cik.h"
  52#endif
  53#include "vi.h"
  54#include "soc15.h"
  55#include "nv.h"
  56#include "bif/bif_4_1_d.h"
  57#include <linux/pci.h>
  58#include <linux/firmware.h>
  59#include "amdgpu_vf_error.h"
  60
  61#include "amdgpu_amdkfd.h"
  62#include "amdgpu_pm.h"
  63
  64#include "amdgpu_xgmi.h"
  65#include "amdgpu_ras.h"
  66#include "amdgpu_pmu.h"
  67#include "amdgpu_fru_eeprom.h"
  68#include "amdgpu_reset.h"
  69
  70#include <linux/suspend.h>
  71#include <drm/task_barrier.h>
  72#include <linux/pm_runtime.h>
  73
  74#include <drm/drm_drv.h>
  75
  76MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
  77MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  78MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
  79MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
  80MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
  81MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
  82MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
  83MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
  84MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
  85MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
  86MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin");
  87MODULE_FIRMWARE("amdgpu/yellow_carp_gpu_info.bin");
  88
  89#define AMDGPU_RESUME_MS                2000
  90
  91const char *amdgpu_asic_name[] = {
  92        "TAHITI",
  93        "PITCAIRN",
  94        "VERDE",
  95        "OLAND",
  96        "HAINAN",
  97        "BONAIRE",
  98        "KAVERI",
  99        "KABINI",
 100        "HAWAII",
 101        "MULLINS",
 102        "TOPAZ",
 103        "TONGA",
 104        "FIJI",
 105        "CARRIZO",
 106        "STONEY",
 107        "POLARIS10",
 108        "POLARIS11",
 109        "POLARIS12",
 110        "VEGAM",
 111        "VEGA10",
 112        "VEGA12",
 113        "VEGA20",
 114        "RAVEN",
 115        "ARCTURUS",
 116        "RENOIR",
 117        "ALDEBARAN",
 118        "NAVI10",
 119        "NAVI14",
 120        "NAVI12",
 121        "SIENNA_CICHLID",
 122        "NAVY_FLOUNDER",
 123        "VANGOGH",
 124        "DIMGREY_CAVEFISH",
 125        "BEIGE_GOBY",
 126        "YELLOW_CARP",
 127        "LAST",
 128};
 129
 130/**
 131 * DOC: pcie_replay_count
 132 *
 133 * The amdgpu driver provides a sysfs API for reporting the total number
 134 * of PCIe replays (NAKs)
 135 * The file pcie_replay_count is used for this and returns the total
 136 * number of replays as a sum of the NAKs generated and NAKs received
 137 */
 138
 139static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
 140                struct device_attribute *attr, char *buf)
 141{
 142        struct drm_device *ddev = dev_get_drvdata(dev);
 143        struct amdgpu_device *adev = drm_to_adev(ddev);
 144        uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
 145
 146        return sysfs_emit(buf, "%llu\n", cnt);
 147}
 148
 149static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
 150                amdgpu_device_get_pcie_replay_count, NULL);
 151
 152static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
 153
 154/**
 155 * DOC: product_name
 156 *
 157 * The amdgpu driver provides a sysfs API for reporting the product name
 158 * for the device
 159 * The file serial_number is used for this and returns the product name
 160 * as returned from the FRU.
 161 * NOTE: This is only available for certain server cards
 162 */
 163
 164static ssize_t amdgpu_device_get_product_name(struct device *dev,
 165                struct device_attribute *attr, char *buf)
 166{
 167        struct drm_device *ddev = dev_get_drvdata(dev);
 168        struct amdgpu_device *adev = drm_to_adev(ddev);
 169
 170        return sysfs_emit(buf, "%s\n", adev->product_name);
 171}
 172
 173static DEVICE_ATTR(product_name, S_IRUGO,
 174                amdgpu_device_get_product_name, NULL);
 175
 176/**
 177 * DOC: product_number
 178 *
 179 * The amdgpu driver provides a sysfs API for reporting the part number
 180 * for the device
 181 * The file serial_number is used for this and returns the part number
 182 * as returned from the FRU.
 183 * NOTE: This is only available for certain server cards
 184 */
 185
 186static ssize_t amdgpu_device_get_product_number(struct device *dev,
 187                struct device_attribute *attr, char *buf)
 188{
 189        struct drm_device *ddev = dev_get_drvdata(dev);
 190        struct amdgpu_device *adev = drm_to_adev(ddev);
 191
 192        return sysfs_emit(buf, "%s\n", adev->product_number);
 193}
 194
 195static DEVICE_ATTR(product_number, S_IRUGO,
 196                amdgpu_device_get_product_number, NULL);
 197
 198/**
 199 * DOC: serial_number
 200 *
 201 * The amdgpu driver provides a sysfs API for reporting the serial number
 202 * for the device
 203 * The file serial_number is used for this and returns the serial number
 204 * as returned from the FRU.
 205 * NOTE: This is only available for certain server cards
 206 */
 207
 208static ssize_t amdgpu_device_get_serial_number(struct device *dev,
 209                struct device_attribute *attr, char *buf)
 210{
 211        struct drm_device *ddev = dev_get_drvdata(dev);
 212        struct amdgpu_device *adev = drm_to_adev(ddev);
 213
 214        return sysfs_emit(buf, "%s\n", adev->serial);
 215}
 216
 217static DEVICE_ATTR(serial_number, S_IRUGO,
 218                amdgpu_device_get_serial_number, NULL);
 219
 220/**
 221 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
 222 *
 223 * @dev: drm_device pointer
 224 *
 225 * Returns true if the device is a dGPU with ATPX power control,
 226 * otherwise return false.
 227 */
 228bool amdgpu_device_supports_px(struct drm_device *dev)
 229{
 230        struct amdgpu_device *adev = drm_to_adev(dev);
 231
 232        if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
 233                return true;
 234        return false;
 235}
 236
 237/**
 238 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
 239 *
 240 * @dev: drm_device pointer
 241 *
 242 * Returns true if the device is a dGPU with ACPI power control,
 243 * otherwise return false.
 244 */
 245bool amdgpu_device_supports_boco(struct drm_device *dev)
 246{
 247        struct amdgpu_device *adev = drm_to_adev(dev);
 248
 249        if (adev->has_pr3 ||
 250            ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
 251                return true;
 252        return false;
 253}
 254
 255/**
 256 * amdgpu_device_supports_baco - Does the device support BACO
 257 *
 258 * @dev: drm_device pointer
 259 *
 260 * Returns true if the device supporte BACO,
 261 * otherwise return false.
 262 */
 263bool amdgpu_device_supports_baco(struct drm_device *dev)
 264{
 265        struct amdgpu_device *adev = drm_to_adev(dev);
 266
 267        return amdgpu_asic_supports_baco(adev);
 268}
 269
 270/**
 271 * amdgpu_device_supports_smart_shift - Is the device dGPU with
 272 * smart shift support
 273 *
 274 * @dev: drm_device pointer
 275 *
 276 * Returns true if the device is a dGPU with Smart Shift support,
 277 * otherwise returns false.
 278 */
 279bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
 280{
 281        return (amdgpu_device_supports_boco(dev) &&
 282                amdgpu_acpi_is_power_shift_control_supported());
 283}
 284
 285/*
 286 * VRAM access helper functions
 287 */
 288
 289/**
 290 * amdgpu_device_vram_access - read/write a buffer in vram
 291 *
 292 * @adev: amdgpu_device pointer
 293 * @pos: offset of the buffer in vram
 294 * @buf: virtual address of the buffer in system memory
 295 * @size: read/write size, sizeof(@buf) must > @size
 296 * @write: true - write to vram, otherwise - read from vram
 297 */
 298void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
 299                               uint32_t *buf, size_t size, bool write)
 300{
 301        unsigned long flags;
 302        uint32_t hi = ~0;
 303        uint64_t last;
 304        int idx;
 305
 306        if (!drm_dev_enter(&adev->ddev, &idx))
 307                return;
 308
 309#ifdef CONFIG_64BIT
 310        last = min(pos + size, adev->gmc.visible_vram_size);
 311        if (last > pos) {
 312                void __iomem *addr = adev->mman.aper_base_kaddr + pos;
 313                size_t count = last - pos;
 314
 315                if (write) {
 316                        memcpy_toio(addr, buf, count);
 317                        mb();
 318                        amdgpu_device_flush_hdp(adev, NULL);
 319                } else {
 320                        amdgpu_device_invalidate_hdp(adev, NULL);
 321                        mb();
 322                        memcpy_fromio(buf, addr, count);
 323                }
 324
 325                if (count == size)
 326                        goto exit;
 327
 328                pos += count;
 329                buf += count / 4;
 330                size -= count;
 331        }
 332#endif
 333
 334        spin_lock_irqsave(&adev->mmio_idx_lock, flags);
 335        for (last = pos + size; pos < last; pos += 4) {
 336                uint32_t tmp = pos >> 31;
 337
 338                WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
 339                if (tmp != hi) {
 340                        WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
 341                        hi = tmp;
 342                }
 343                if (write)
 344                        WREG32_NO_KIQ(mmMM_DATA, *buf++);
 345                else
 346                        *buf++ = RREG32_NO_KIQ(mmMM_DATA);
 347        }
 348        spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
 349
 350#ifdef CONFIG_64BIT
 351exit:
 352#endif
 353        drm_dev_exit(idx);
 354}
 355
 356/*
 357 * register access helper functions.
 358 */
 359
 360/* Check if hw access should be skipped because of hotplug or device error */
 361bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
 362{
 363        if (adev->no_hw_access)
 364                return true;
 365
 366#ifdef CONFIG_LOCKDEP
 367        /*
 368         * This is a bit complicated to understand, so worth a comment. What we assert
 369         * here is that the GPU reset is not running on another thread in parallel.
 370         *
 371         * For this we trylock the read side of the reset semaphore, if that succeeds
 372         * we know that the reset is not running in paralell.
 373         *
 374         * If the trylock fails we assert that we are either already holding the read
 375         * side of the lock or are the reset thread itself and hold the write side of
 376         * the lock.
 377         */
 378        if (in_task()) {
 379                if (down_read_trylock(&adev->reset_sem))
 380                        up_read(&adev->reset_sem);
 381                else
 382                        lockdep_assert_held(&adev->reset_sem);
 383        }
 384#endif
 385        return false;
 386}
 387
 388/**
 389 * amdgpu_device_rreg - read a memory mapped IO or indirect register
 390 *
 391 * @adev: amdgpu_device pointer
 392 * @reg: dword aligned register offset
 393 * @acc_flags: access flags which require special behavior
 394 *
 395 * Returns the 32 bit value from the offset specified.
 396 */
 397uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
 398                            uint32_t reg, uint32_t acc_flags)
 399{
 400        uint32_t ret;
 401
 402        if (amdgpu_device_skip_hw_access(adev))
 403                return 0;
 404
 405        if ((reg * 4) < adev->rmmio_size) {
 406                if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 407                    amdgpu_sriov_runtime(adev) &&
 408                    down_read_trylock(&adev->reset_sem)) {
 409                        ret = amdgpu_kiq_rreg(adev, reg);
 410                        up_read(&adev->reset_sem);
 411                } else {
 412                        ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
 413                }
 414        } else {
 415                ret = adev->pcie_rreg(adev, reg * 4);
 416        }
 417
 418        trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
 419
 420        return ret;
 421}
 422
 423/*
 424 * MMIO register read with bytes helper functions
 425 * @offset:bytes offset from MMIO start
 426 *
 427*/
 428
 429/**
 430 * amdgpu_mm_rreg8 - read a memory mapped IO register
 431 *
 432 * @adev: amdgpu_device pointer
 433 * @offset: byte aligned register offset
 434 *
 435 * Returns the 8 bit value from the offset specified.
 436 */
 437uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
 438{
 439        if (amdgpu_device_skip_hw_access(adev))
 440                return 0;
 441
 442        if (offset < adev->rmmio_size)
 443                return (readb(adev->rmmio + offset));
 444        BUG();
 445}
 446
 447/*
 448 * MMIO register write with bytes helper functions
 449 * @offset:bytes offset from MMIO start
 450 * @value: the value want to be written to the register
 451 *
 452*/
 453/**
 454 * amdgpu_mm_wreg8 - read a memory mapped IO register
 455 *
 456 * @adev: amdgpu_device pointer
 457 * @offset: byte aligned register offset
 458 * @value: 8 bit value to write
 459 *
 460 * Writes the value specified to the offset specified.
 461 */
 462void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
 463{
 464        if (amdgpu_device_skip_hw_access(adev))
 465                return;
 466
 467        if (offset < adev->rmmio_size)
 468                writeb(value, adev->rmmio + offset);
 469        else
 470                BUG();
 471}
 472
 473/**
 474 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
 475 *
 476 * @adev: amdgpu_device pointer
 477 * @reg: dword aligned register offset
 478 * @v: 32 bit value to write to the register
 479 * @acc_flags: access flags which require special behavior
 480 *
 481 * Writes the value specified to the offset specified.
 482 */
 483void amdgpu_device_wreg(struct amdgpu_device *adev,
 484                        uint32_t reg, uint32_t v,
 485                        uint32_t acc_flags)
 486{
 487        if (amdgpu_device_skip_hw_access(adev))
 488                return;
 489
 490        if ((reg * 4) < adev->rmmio_size) {
 491                if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 492                    amdgpu_sriov_runtime(adev) &&
 493                    down_read_trylock(&adev->reset_sem)) {
 494                        amdgpu_kiq_wreg(adev, reg, v);
 495                        up_read(&adev->reset_sem);
 496                } else {
 497                        writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 498                }
 499        } else {
 500                adev->pcie_wreg(adev, reg * 4, v);
 501        }
 502
 503        trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
 504}
 505
 506/*
 507 * amdgpu_mm_wreg_mmio_rlc -  write register either with mmio or with RLC path if in range
 508 *
 509 * this function is invoked only the debugfs register access
 510 * */
 511void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
 512                             uint32_t reg, uint32_t v)
 513{
 514        if (amdgpu_device_skip_hw_access(adev))
 515                return;
 516
 517        if (amdgpu_sriov_fullaccess(adev) &&
 518            adev->gfx.rlc.funcs &&
 519            adev->gfx.rlc.funcs->is_rlcg_access_range) {
 520                if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
 521                        return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v, 0, 0);
 522        } else {
 523                writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 524        }
 525}
 526
 527/**
 528 * amdgpu_mm_rdoorbell - read a doorbell dword
 529 *
 530 * @adev: amdgpu_device pointer
 531 * @index: doorbell index
 532 *
 533 * Returns the value in the doorbell aperture at the
 534 * requested doorbell index (CIK).
 535 */
 536u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
 537{
 538        if (amdgpu_device_skip_hw_access(adev))
 539                return 0;
 540
 541        if (index < adev->doorbell.num_doorbells) {
 542                return readl(adev->doorbell.ptr + index);
 543        } else {
 544                DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
 545                return 0;
 546        }
 547}
 548
 549/**
 550 * amdgpu_mm_wdoorbell - write a doorbell dword
 551 *
 552 * @adev: amdgpu_device pointer
 553 * @index: doorbell index
 554 * @v: value to write
 555 *
 556 * Writes @v to the doorbell aperture at the
 557 * requested doorbell index (CIK).
 558 */
 559void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
 560{
 561        if (amdgpu_device_skip_hw_access(adev))
 562                return;
 563
 564        if (index < adev->doorbell.num_doorbells) {
 565                writel(v, adev->doorbell.ptr + index);
 566        } else {
 567                DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
 568        }
 569}
 570
 571/**
 572 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
 573 *
 574 * @adev: amdgpu_device pointer
 575 * @index: doorbell index
 576 *
 577 * Returns the value in the doorbell aperture at the
 578 * requested doorbell index (VEGA10+).
 579 */
 580u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
 581{
 582        if (amdgpu_device_skip_hw_access(adev))
 583                return 0;
 584
 585        if (index < adev->doorbell.num_doorbells) {
 586                return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
 587        } else {
 588                DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
 589                return 0;
 590        }
 591}
 592
 593/**
 594 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
 595 *
 596 * @adev: amdgpu_device pointer
 597 * @index: doorbell index
 598 * @v: value to write
 599 *
 600 * Writes @v to the doorbell aperture at the
 601 * requested doorbell index (VEGA10+).
 602 */
 603void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
 604{
 605        if (amdgpu_device_skip_hw_access(adev))
 606                return;
 607
 608        if (index < adev->doorbell.num_doorbells) {
 609                atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
 610        } else {
 611                DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
 612        }
 613}
 614
 615/**
 616 * amdgpu_device_indirect_rreg - read an indirect register
 617 *
 618 * @adev: amdgpu_device pointer
 619 * @pcie_index: mmio register offset
 620 * @pcie_data: mmio register offset
 621 * @reg_addr: indirect register address to read from
 622 *
 623 * Returns the value of indirect register @reg_addr
 624 */
 625u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
 626                                u32 pcie_index, u32 pcie_data,
 627                                u32 reg_addr)
 628{
 629        unsigned long flags;
 630        u32 r;
 631        void __iomem *pcie_index_offset;
 632        void __iomem *pcie_data_offset;
 633
 634        spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 635        pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 636        pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 637
 638        writel(reg_addr, pcie_index_offset);
 639        readl(pcie_index_offset);
 640        r = readl(pcie_data_offset);
 641        spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 642
 643        return r;
 644}
 645
 646/**
 647 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
 648 *
 649 * @adev: amdgpu_device pointer
 650 * @pcie_index: mmio register offset
 651 * @pcie_data: mmio register offset
 652 * @reg_addr: indirect register address to read from
 653 *
 654 * Returns the value of indirect register @reg_addr
 655 */
 656u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
 657                                  u32 pcie_index, u32 pcie_data,
 658                                  u32 reg_addr)
 659{
 660        unsigned long flags;
 661        u64 r;
 662        void __iomem *pcie_index_offset;
 663        void __iomem *pcie_data_offset;
 664
 665        spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 666        pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 667        pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 668
 669        /* read low 32 bits */
 670        writel(reg_addr, pcie_index_offset);
 671        readl(pcie_index_offset);
 672        r = readl(pcie_data_offset);
 673        /* read high 32 bits */
 674        writel(reg_addr + 4, pcie_index_offset);
 675        readl(pcie_index_offset);
 676        r |= ((u64)readl(pcie_data_offset) << 32);
 677        spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 678
 679        return r;
 680}
 681
 682/**
 683 * amdgpu_device_indirect_wreg - write an indirect register address
 684 *
 685 * @adev: amdgpu_device pointer
 686 * @pcie_index: mmio register offset
 687 * @pcie_data: mmio register offset
 688 * @reg_addr: indirect register offset
 689 * @reg_data: indirect register data
 690 *
 691 */
 692void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
 693                                 u32 pcie_index, u32 pcie_data,
 694                                 u32 reg_addr, u32 reg_data)
 695{
 696        unsigned long flags;
 697        void __iomem *pcie_index_offset;
 698        void __iomem *pcie_data_offset;
 699
 700        spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 701        pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 702        pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 703
 704        writel(reg_addr, pcie_index_offset);
 705        readl(pcie_index_offset);
 706        writel(reg_data, pcie_data_offset);
 707        readl(pcie_data_offset);
 708        spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 709}
 710
 711/**
 712 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
 713 *
 714 * @adev: amdgpu_device pointer
 715 * @pcie_index: mmio register offset
 716 * @pcie_data: mmio register offset
 717 * @reg_addr: indirect register offset
 718 * @reg_data: indirect register data
 719 *
 720 */
 721void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
 722                                   u32 pcie_index, u32 pcie_data,
 723                                   u32 reg_addr, u64 reg_data)
 724{
 725        unsigned long flags;
 726        void __iomem *pcie_index_offset;
 727        void __iomem *pcie_data_offset;
 728
 729        spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 730        pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 731        pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 732
 733        /* write low 32 bits */
 734        writel(reg_addr, pcie_index_offset);
 735        readl(pcie_index_offset);
 736        writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
 737        readl(pcie_data_offset);
 738        /* write high 32 bits */
 739        writel(reg_addr + 4, pcie_index_offset);
 740        readl(pcie_index_offset);
 741        writel((u32)(reg_data >> 32), pcie_data_offset);
 742        readl(pcie_data_offset);
 743        spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 744}
 745
 746/**
 747 * amdgpu_invalid_rreg - dummy reg read function
 748 *
 749 * @adev: amdgpu_device pointer
 750 * @reg: offset of register
 751 *
 752 * Dummy register read function.  Used for register blocks
 753 * that certain asics don't have (all asics).
 754 * Returns the value in the register.
 755 */
 756static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
 757{
 758        DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
 759        BUG();
 760        return 0;
 761}
 762
 763/**
 764 * amdgpu_invalid_wreg - dummy reg write function
 765 *
 766 * @adev: amdgpu_device pointer
 767 * @reg: offset of register
 768 * @v: value to write to the register
 769 *
 770 * Dummy register read function.  Used for register blocks
 771 * that certain asics don't have (all asics).
 772 */
 773static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
 774{
 775        DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
 776                  reg, v);
 777        BUG();
 778}
 779
 780/**
 781 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
 782 *
 783 * @adev: amdgpu_device pointer
 784 * @reg: offset of register
 785 *
 786 * Dummy register read function.  Used for register blocks
 787 * that certain asics don't have (all asics).
 788 * Returns the value in the register.
 789 */
 790static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
 791{
 792        DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
 793        BUG();
 794        return 0;
 795}
 796
 797/**
 798 * amdgpu_invalid_wreg64 - dummy reg write function
 799 *
 800 * @adev: amdgpu_device pointer
 801 * @reg: offset of register
 802 * @v: value to write to the register
 803 *
 804 * Dummy register read function.  Used for register blocks
 805 * that certain asics don't have (all asics).
 806 */
 807static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
 808{
 809        DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
 810                  reg, v);
 811        BUG();
 812}
 813
 814/**
 815 * amdgpu_block_invalid_rreg - dummy reg read function
 816 *
 817 * @adev: amdgpu_device pointer
 818 * @block: offset of instance
 819 * @reg: offset of register
 820 *
 821 * Dummy register read function.  Used for register blocks
 822 * that certain asics don't have (all asics).
 823 * Returns the value in the register.
 824 */
 825static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
 826                                          uint32_t block, uint32_t reg)
 827{
 828        DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
 829                  reg, block);
 830        BUG();
 831        return 0;
 832}
 833
 834/**
 835 * amdgpu_block_invalid_wreg - dummy reg write function
 836 *
 837 * @adev: amdgpu_device pointer
 838 * @block: offset of instance
 839 * @reg: offset of register
 840 * @v: value to write to the register
 841 *
 842 * Dummy register read function.  Used for register blocks
 843 * that certain asics don't have (all asics).
 844 */
 845static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
 846                                      uint32_t block,
 847                                      uint32_t reg, uint32_t v)
 848{
 849        DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
 850                  reg, block, v);
 851        BUG();
 852}
 853
 854/**
 855 * amdgpu_device_asic_init - Wrapper for atom asic_init
 856 *
 857 * @adev: amdgpu_device pointer
 858 *
 859 * Does any asic specific work and then calls atom asic init.
 860 */
 861static int amdgpu_device_asic_init(struct amdgpu_device *adev)
 862{
 863        amdgpu_asic_pre_asic_init(adev);
 864
 865        return amdgpu_atom_asic_init(adev->mode_info.atom_context);
 866}
 867
 868/**
 869 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
 870 *
 871 * @adev: amdgpu_device pointer
 872 *
 873 * Allocates a scratch page of VRAM for use by various things in the
 874 * driver.
 875 */
 876static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
 877{
 878        return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
 879                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
 880                                       &adev->vram_scratch.robj,
 881                                       &adev->vram_scratch.gpu_addr,
 882                                       (void **)&adev->vram_scratch.ptr);
 883}
 884
 885/**
 886 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
 887 *
 888 * @adev: amdgpu_device pointer
 889 *
 890 * Frees the VRAM scratch page.
 891 */
 892static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
 893{
 894        amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
 895}
 896
 897/**
 898 * amdgpu_device_program_register_sequence - program an array of registers.
 899 *
 900 * @adev: amdgpu_device pointer
 901 * @registers: pointer to the register array
 902 * @array_size: size of the register array
 903 *
 904 * Programs an array or registers with and and or masks.
 905 * This is a helper for setting golden registers.
 906 */
 907void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
 908                                             const u32 *registers,
 909                                             const u32 array_size)
 910{
 911        u32 tmp, reg, and_mask, or_mask;
 912        int i;
 913
 914        if (array_size % 3)
 915                return;
 916
 917        for (i = 0; i < array_size; i +=3) {
 918                reg = registers[i + 0];
 919                and_mask = registers[i + 1];
 920                or_mask = registers[i + 2];
 921
 922                if (and_mask == 0xffffffff) {
 923                        tmp = or_mask;
 924                } else {
 925                        tmp = RREG32(reg);
 926                        tmp &= ~and_mask;
 927                        if (adev->family >= AMDGPU_FAMILY_AI)
 928                                tmp |= (or_mask & and_mask);
 929                        else
 930                                tmp |= or_mask;
 931                }
 932                WREG32(reg, tmp);
 933        }
 934}
 935
 936/**
 937 * amdgpu_device_pci_config_reset - reset the GPU
 938 *
 939 * @adev: amdgpu_device pointer
 940 *
 941 * Resets the GPU using the pci config reset sequence.
 942 * Only applicable to asics prior to vega10.
 943 */
 944void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
 945{
 946        pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
 947}
 948
 949/**
 950 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
 951 *
 952 * @adev: amdgpu_device pointer
 953 *
 954 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
 955 */
 956int amdgpu_device_pci_reset(struct amdgpu_device *adev)
 957{
 958        return pci_reset_function(adev->pdev);
 959}
 960
 961/*
 962 * GPU doorbell aperture helpers function.
 963 */
 964/**
 965 * amdgpu_device_doorbell_init - Init doorbell driver information.
 966 *
 967 * @adev: amdgpu_device pointer
 968 *
 969 * Init doorbell driver information (CIK)
 970 * Returns 0 on success, error on failure.
 971 */
 972static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
 973{
 974
 975        /* No doorbell on SI hardware generation */
 976        if (adev->asic_type < CHIP_BONAIRE) {
 977                adev->doorbell.base = 0;
 978                adev->doorbell.size = 0;
 979                adev->doorbell.num_doorbells = 0;
 980                adev->doorbell.ptr = NULL;
 981                return 0;
 982        }
 983
 984        if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
 985                return -EINVAL;
 986
 987        amdgpu_asic_init_doorbell_index(adev);
 988
 989        /* doorbell bar mapping */
 990        adev->doorbell.base = pci_resource_start(adev->pdev, 2);
 991        adev->doorbell.size = pci_resource_len(adev->pdev, 2);
 992
 993        adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
 994                                             adev->doorbell_index.max_assignment+1);
 995        if (adev->doorbell.num_doorbells == 0)
 996                return -EINVAL;
 997
 998        /* For Vega, reserve and map two pages on doorbell BAR since SDMA
 999         * paging queue doorbell use the second page. The
1000         * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
1001         * doorbells are in the first page. So with paging queue enabled,
1002         * the max num_doorbells should + 1 page (0x400 in dword)
1003         */
1004        if (adev->asic_type >= CHIP_VEGA10)
1005                adev->doorbell.num_doorbells += 0x400;
1006
1007        adev->doorbell.ptr = ioremap(adev->doorbell.base,
1008                                     adev->doorbell.num_doorbells *
1009                                     sizeof(u32));
1010        if (adev->doorbell.ptr == NULL)
1011                return -ENOMEM;
1012
1013        return 0;
1014}
1015
1016/**
1017 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
1018 *
1019 * @adev: amdgpu_device pointer
1020 *
1021 * Tear down doorbell driver information (CIK)
1022 */
1023static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
1024{
1025        iounmap(adev->doorbell.ptr);
1026        adev->doorbell.ptr = NULL;
1027}
1028
1029
1030
1031/*
1032 * amdgpu_device_wb_*()
1033 * Writeback is the method by which the GPU updates special pages in memory
1034 * with the status of certain GPU events (fences, ring pointers,etc.).
1035 */
1036
1037/**
1038 * amdgpu_device_wb_fini - Disable Writeback and free memory
1039 *
1040 * @adev: amdgpu_device pointer
1041 *
1042 * Disables Writeback and frees the Writeback memory (all asics).
1043 * Used at driver shutdown.
1044 */
1045static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1046{
1047        if (adev->wb.wb_obj) {
1048                amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1049                                      &adev->wb.gpu_addr,
1050                                      (void **)&adev->wb.wb);
1051                adev->wb.wb_obj = NULL;
1052        }
1053}
1054
1055/**
1056 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
1057 *
1058 * @adev: amdgpu_device pointer
1059 *
1060 * Initializes writeback and allocates writeback memory (all asics).
1061 * Used at driver startup.
1062 * Returns 0 on success or an -error on failure.
1063 */
1064static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1065{
1066        int r;
1067
1068        if (adev->wb.wb_obj == NULL) {
1069                /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1070                r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1071                                            PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1072                                            &adev->wb.wb_obj, &adev->wb.gpu_addr,
1073                                            (void **)&adev->wb.wb);
1074                if (r) {
1075                        dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1076                        return r;
1077                }
1078
1079                adev->wb.num_wb = AMDGPU_MAX_WB;
1080                memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1081
1082                /* clear wb memory */
1083                memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1084        }
1085
1086        return 0;
1087}
1088
1089/**
1090 * amdgpu_device_wb_get - Allocate a wb entry
1091 *
1092 * @adev: amdgpu_device pointer
1093 * @wb: wb index
1094 *
1095 * Allocate a wb slot for use by the driver (all asics).
1096 * Returns 0 on success or -EINVAL on failure.
1097 */
1098int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1099{
1100        unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1101
1102        if (offset < adev->wb.num_wb) {
1103                __set_bit(offset, adev->wb.used);
1104                *wb = offset << 3; /* convert to dw offset */
1105                return 0;
1106        } else {
1107                return -EINVAL;
1108        }
1109}
1110
1111/**
1112 * amdgpu_device_wb_free - Free a wb entry
1113 *
1114 * @adev: amdgpu_device pointer
1115 * @wb: wb index
1116 *
1117 * Free a wb slot allocated for use by the driver (all asics)
1118 */
1119void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1120{
1121        wb >>= 3;
1122        if (wb < adev->wb.num_wb)
1123                __clear_bit(wb, adev->wb.used);
1124}
1125
1126/**
1127 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1128 *
1129 * @adev: amdgpu_device pointer
1130 *
1131 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1132 * to fail, but if any of the BARs is not accessible after the size we abort
1133 * driver loading by returning -ENODEV.
1134 */
1135int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1136{
1137        int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1138        struct pci_bus *root;
1139        struct resource *res;
1140        unsigned i;
1141        u16 cmd;
1142        int r;
1143
1144        /* Bypass for VF */
1145        if (amdgpu_sriov_vf(adev))
1146                return 0;
1147
1148        /* skip if the bios has already enabled large BAR */
1149        if (adev->gmc.real_vram_size &&
1150            (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1151                return 0;
1152
1153        /* Check if the root BUS has 64bit memory resources */
1154        root = adev->pdev->bus;
1155        while (root->parent)
1156                root = root->parent;
1157
1158        pci_bus_for_each_resource(root, res, i) {
1159                if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1160                    res->start > 0x100000000ull)
1161                        break;
1162        }
1163
1164        /* Trying to resize is pointless without a root hub window above 4GB */
1165        if (!res)
1166                return 0;
1167
1168        /* Limit the BAR size to what is available */
1169        rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1170                        rbar_size);
1171
1172        /* Disable memory decoding while we change the BAR addresses and size */
1173        pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1174        pci_write_config_word(adev->pdev, PCI_COMMAND,
1175                              cmd & ~PCI_COMMAND_MEMORY);
1176
1177        /* Free the VRAM and doorbell BAR, we most likely need to move both. */
1178        amdgpu_device_doorbell_fini(adev);
1179        if (adev->asic_type >= CHIP_BONAIRE)
1180                pci_release_resource(adev->pdev, 2);
1181
1182        pci_release_resource(adev->pdev, 0);
1183
1184        r = pci_resize_resource(adev->pdev, 0, rbar_size);
1185        if (r == -ENOSPC)
1186                DRM_INFO("Not enough PCI address space for a large BAR.");
1187        else if (r && r != -ENOTSUPP)
1188                DRM_ERROR("Problem resizing BAR0 (%d).", r);
1189
1190        pci_assign_unassigned_bus_resources(adev->pdev->bus);
1191
1192        /* When the doorbell or fb BAR isn't available we have no chance of
1193         * using the device.
1194         */
1195        r = amdgpu_device_doorbell_init(adev);
1196        if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1197                return -ENODEV;
1198
1199        pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1200
1201        return 0;
1202}
1203
1204/*
1205 * GPU helpers function.
1206 */
1207/**
1208 * amdgpu_device_need_post - check if the hw need post or not
1209 *
1210 * @adev: amdgpu_device pointer
1211 *
1212 * Check if the asic has been initialized (all asics) at driver startup
1213 * or post is needed if  hw reset is performed.
1214 * Returns true if need or false if not.
1215 */
1216bool amdgpu_device_need_post(struct amdgpu_device *adev)
1217{
1218        uint32_t reg;
1219
1220        if (amdgpu_sriov_vf(adev))
1221                return false;
1222
1223        if (amdgpu_passthrough(adev)) {
1224                /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1225                 * some old smc fw still need driver do vPost otherwise gpu hang, while
1226                 * those smc fw version above 22.15 doesn't have this flaw, so we force
1227                 * vpost executed for smc version below 22.15
1228                 */
1229                if (adev->asic_type == CHIP_FIJI) {
1230                        int err;
1231                        uint32_t fw_ver;
1232                        err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1233                        /* force vPost if error occured */
1234                        if (err)
1235                                return true;
1236
1237                        fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1238                        if (fw_ver < 0x00160e00)
1239                                return true;
1240                }
1241        }
1242
1243        /* Don't post if we need to reset whole hive on init */
1244        if (adev->gmc.xgmi.pending_reset)
1245                return false;
1246
1247        if (adev->has_hw_reset) {
1248                adev->has_hw_reset = false;
1249                return true;
1250        }
1251
1252        /* bios scratch used on CIK+ */
1253        if (adev->asic_type >= CHIP_BONAIRE)
1254                return amdgpu_atombios_scratch_need_asic_init(adev);
1255
1256        /* check MEM_SIZE for older asics */
1257        reg = amdgpu_asic_get_config_memsize(adev);
1258
1259        if ((reg != 0) && (reg != 0xffffffff))
1260                return false;
1261
1262        return true;
1263}
1264
1265/* if we get transitioned to only one device, take VGA back */
1266/**
1267 * amdgpu_device_vga_set_decode - enable/disable vga decode
1268 *
1269 * @cookie: amdgpu_device pointer
1270 * @state: enable/disable vga decode
1271 *
1272 * Enable/disable vga decode (all asics).
1273 * Returns VGA resource flags.
1274 */
1275static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
1276{
1277        struct amdgpu_device *adev = cookie;
1278        amdgpu_asic_set_vga_state(adev, state);
1279        if (state)
1280                return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1281                       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1282        else
1283                return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1284}
1285
1286/**
1287 * amdgpu_device_check_block_size - validate the vm block size
1288 *
1289 * @adev: amdgpu_device pointer
1290 *
1291 * Validates the vm block size specified via module parameter.
1292 * The vm block size defines number of bits in page table versus page directory,
1293 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1294 * page table and the remaining bits are in the page directory.
1295 */
1296static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1297{
1298        /* defines number of bits in page table versus page directory,
1299         * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1300         * page table and the remaining bits are in the page directory */
1301        if (amdgpu_vm_block_size == -1)
1302                return;
1303
1304        if (amdgpu_vm_block_size < 9) {
1305                dev_warn(adev->dev, "VM page table size (%d) too small\n",
1306                         amdgpu_vm_block_size);
1307                amdgpu_vm_block_size = -1;
1308        }
1309}
1310
1311/**
1312 * amdgpu_device_check_vm_size - validate the vm size
1313 *
1314 * @adev: amdgpu_device pointer
1315 *
1316 * Validates the vm size in GB specified via module parameter.
1317 * The VM size is the size of the GPU virtual memory space in GB.
1318 */
1319static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1320{
1321        /* no need to check the default value */
1322        if (amdgpu_vm_size == -1)
1323                return;
1324
1325        if (amdgpu_vm_size < 1) {
1326                dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1327                         amdgpu_vm_size);
1328                amdgpu_vm_size = -1;
1329        }
1330}
1331
1332static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1333{
1334        struct sysinfo si;
1335        bool is_os_64 = (sizeof(void *) == 8);
1336        uint64_t total_memory;
1337        uint64_t dram_size_seven_GB = 0x1B8000000;
1338        uint64_t dram_size_three_GB = 0xB8000000;
1339
1340        if (amdgpu_smu_memory_pool_size == 0)
1341                return;
1342
1343        if (!is_os_64) {
1344                DRM_WARN("Not 64-bit OS, feature not supported\n");
1345                goto def_value;
1346        }
1347        si_meminfo(&si);
1348        total_memory = (uint64_t)si.totalram * si.mem_unit;
1349
1350        if ((amdgpu_smu_memory_pool_size == 1) ||
1351                (amdgpu_smu_memory_pool_size == 2)) {
1352                if (total_memory < dram_size_three_GB)
1353                        goto def_value1;
1354        } else if ((amdgpu_smu_memory_pool_size == 4) ||
1355                (amdgpu_smu_memory_pool_size == 8)) {
1356                if (total_memory < dram_size_seven_GB)
1357                        goto def_value1;
1358        } else {
1359                DRM_WARN("Smu memory pool size not supported\n");
1360                goto def_value;
1361        }
1362        adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1363
1364        return;
1365
1366def_value1:
1367        DRM_WARN("No enough system memory\n");
1368def_value:
1369        adev->pm.smu_prv_buffer_size = 0;
1370}
1371
1372static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1373{
1374        if (!(adev->flags & AMD_IS_APU) ||
1375            adev->asic_type < CHIP_RAVEN)
1376                return 0;
1377
1378        switch (adev->asic_type) {
1379        case CHIP_RAVEN:
1380                if (adev->pdev->device == 0x15dd)
1381                        adev->apu_flags |= AMD_APU_IS_RAVEN;
1382                if (adev->pdev->device == 0x15d8)
1383                        adev->apu_flags |= AMD_APU_IS_PICASSO;
1384                break;
1385        case CHIP_RENOIR:
1386                if ((adev->pdev->device == 0x1636) ||
1387                    (adev->pdev->device == 0x164c))
1388                        adev->apu_flags |= AMD_APU_IS_RENOIR;
1389                else
1390                        adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1391                break;
1392        case CHIP_VANGOGH:
1393                adev->apu_flags |= AMD_APU_IS_VANGOGH;
1394                break;
1395        case CHIP_YELLOW_CARP:
1396                break;
1397        default:
1398                return -EINVAL;
1399        }
1400
1401        return 0;
1402}
1403
1404/**
1405 * amdgpu_device_check_arguments - validate module params
1406 *
1407 * @adev: amdgpu_device pointer
1408 *
1409 * Validates certain module parameters and updates
1410 * the associated values used by the driver (all asics).
1411 */
1412static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1413{
1414        if (amdgpu_sched_jobs < 4) {
1415                dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1416                         amdgpu_sched_jobs);
1417                amdgpu_sched_jobs = 4;
1418        } else if (!is_power_of_2(amdgpu_sched_jobs)){
1419                dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1420                         amdgpu_sched_jobs);
1421                amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1422        }
1423
1424        if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1425                /* gart size must be greater or equal to 32M */
1426                dev_warn(adev->dev, "gart size (%d) too small\n",
1427                         amdgpu_gart_size);
1428                amdgpu_gart_size = -1;
1429        }
1430
1431        if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1432                /* gtt size must be greater or equal to 32M */
1433                dev_warn(adev->dev, "gtt size (%d) too small\n",
1434                                 amdgpu_gtt_size);
1435                amdgpu_gtt_size = -1;
1436        }
1437
1438        /* valid range is between 4 and 9 inclusive */
1439        if (amdgpu_vm_fragment_size != -1 &&
1440            (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1441                dev_warn(adev->dev, "valid range is between 4 and 9\n");
1442                amdgpu_vm_fragment_size = -1;
1443        }
1444
1445        if (amdgpu_sched_hw_submission < 2) {
1446                dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1447                         amdgpu_sched_hw_submission);
1448                amdgpu_sched_hw_submission = 2;
1449        } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1450                dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1451                         amdgpu_sched_hw_submission);
1452                amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1453        }
1454
1455        amdgpu_device_check_smu_prv_buffer_size(adev);
1456
1457        amdgpu_device_check_vm_size(adev);
1458
1459        amdgpu_device_check_block_size(adev);
1460
1461        adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1462
1463        amdgpu_gmc_tmz_set(adev);
1464
1465        amdgpu_gmc_noretry_set(adev);
1466
1467        return 0;
1468}
1469
1470/**
1471 * amdgpu_switcheroo_set_state - set switcheroo state
1472 *
1473 * @pdev: pci dev pointer
1474 * @state: vga_switcheroo state
1475 *
1476 * Callback for the switcheroo driver.  Suspends or resumes the
1477 * the asics before or after it is powered up using ACPI methods.
1478 */
1479static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1480                                        enum vga_switcheroo_state state)
1481{
1482        struct drm_device *dev = pci_get_drvdata(pdev);
1483        int r;
1484
1485        if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1486                return;
1487
1488        if (state == VGA_SWITCHEROO_ON) {
1489                pr_info("switched on\n");
1490                /* don't suspend or resume card normally */
1491                dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1492
1493                pci_set_power_state(pdev, PCI_D0);
1494                amdgpu_device_load_pci_state(pdev);
1495                r = pci_enable_device(pdev);
1496                if (r)
1497                        DRM_WARN("pci_enable_device failed (%d)\n", r);
1498                amdgpu_device_resume(dev, true);
1499
1500                dev->switch_power_state = DRM_SWITCH_POWER_ON;
1501        } else {
1502                pr_info("switched off\n");
1503                dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1504                amdgpu_device_suspend(dev, true);
1505                amdgpu_device_cache_pci_state(pdev);
1506                /* Shut down the device */
1507                pci_disable_device(pdev);
1508                pci_set_power_state(pdev, PCI_D3cold);
1509                dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1510        }
1511}
1512
1513/**
1514 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1515 *
1516 * @pdev: pci dev pointer
1517 *
1518 * Callback for the switcheroo driver.  Check of the switcheroo
1519 * state can be changed.
1520 * Returns true if the state can be changed, false if not.
1521 */
1522static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1523{
1524        struct drm_device *dev = pci_get_drvdata(pdev);
1525
1526        /*
1527        * FIXME: open_count is protected by drm_global_mutex but that would lead to
1528        * locking inversion with the driver load path. And the access here is
1529        * completely racy anyway. So don't bother with locking for now.
1530        */
1531        return atomic_read(&dev->open_count) == 0;
1532}
1533
1534static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1535        .set_gpu_state = amdgpu_switcheroo_set_state,
1536        .reprobe = NULL,
1537        .can_switch = amdgpu_switcheroo_can_switch,
1538};
1539
1540/**
1541 * amdgpu_device_ip_set_clockgating_state - set the CG state
1542 *
1543 * @dev: amdgpu_device pointer
1544 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1545 * @state: clockgating state (gate or ungate)
1546 *
1547 * Sets the requested clockgating state for all instances of
1548 * the hardware IP specified.
1549 * Returns the error code from the last instance.
1550 */
1551int amdgpu_device_ip_set_clockgating_state(void *dev,
1552                                           enum amd_ip_block_type block_type,
1553                                           enum amd_clockgating_state state)
1554{
1555        struct amdgpu_device *adev = dev;
1556        int i, r = 0;
1557
1558        for (i = 0; i < adev->num_ip_blocks; i++) {
1559                if (!adev->ip_blocks[i].status.valid)
1560                        continue;
1561                if (adev->ip_blocks[i].version->type != block_type)
1562                        continue;
1563                if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1564                        continue;
1565                r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1566                        (void *)adev, state);
1567                if (r)
1568                        DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1569                                  adev->ip_blocks[i].version->funcs->name, r);
1570        }
1571        return r;
1572}
1573
1574/**
1575 * amdgpu_device_ip_set_powergating_state - set the PG state
1576 *
1577 * @dev: amdgpu_device pointer
1578 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1579 * @state: powergating state (gate or ungate)
1580 *
1581 * Sets the requested powergating state for all instances of
1582 * the hardware IP specified.
1583 * Returns the error code from the last instance.
1584 */
1585int amdgpu_device_ip_set_powergating_state(void *dev,
1586                                           enum amd_ip_block_type block_type,
1587                                           enum amd_powergating_state state)
1588{
1589        struct amdgpu_device *adev = dev;
1590        int i, r = 0;
1591
1592        for (i = 0; i < adev->num_ip_blocks; i++) {
1593                if (!adev->ip_blocks[i].status.valid)
1594                        continue;
1595                if (adev->ip_blocks[i].version->type != block_type)
1596                        continue;
1597                if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1598                        continue;
1599                r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1600                        (void *)adev, state);
1601                if (r)
1602                        DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1603                                  adev->ip_blocks[i].version->funcs->name, r);
1604        }
1605        return r;
1606}
1607
1608/**
1609 * amdgpu_device_ip_get_clockgating_state - get the CG state
1610 *
1611 * @adev: amdgpu_device pointer
1612 * @flags: clockgating feature flags
1613 *
1614 * Walks the list of IPs on the device and updates the clockgating
1615 * flags for each IP.
1616 * Updates @flags with the feature flags for each hardware IP where
1617 * clockgating is enabled.
1618 */
1619void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1620                                            u32 *flags)
1621{
1622        int i;
1623
1624        for (i = 0; i < adev->num_ip_blocks; i++) {
1625                if (!adev->ip_blocks[i].status.valid)
1626                        continue;
1627                if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1628                        adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1629        }
1630}
1631
1632/**
1633 * amdgpu_device_ip_wait_for_idle - wait for idle
1634 *
1635 * @adev: amdgpu_device pointer
1636 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1637 *
1638 * Waits for the request hardware IP to be idle.
1639 * Returns 0 for success or a negative error code on failure.
1640 */
1641int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1642                                   enum amd_ip_block_type block_type)
1643{
1644        int i, r;
1645
1646        for (i = 0; i < adev->num_ip_blocks; i++) {
1647                if (!adev->ip_blocks[i].status.valid)
1648                        continue;
1649                if (adev->ip_blocks[i].version->type == block_type) {
1650                        r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1651                        if (r)
1652                                return r;
1653                        break;
1654                }
1655        }
1656        return 0;
1657
1658}
1659
1660/**
1661 * amdgpu_device_ip_is_idle - is the hardware IP idle
1662 *
1663 * @adev: amdgpu_device pointer
1664 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1665 *
1666 * Check if the hardware IP is idle or not.
1667 * Returns true if it the IP is idle, false if not.
1668 */
1669bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1670                              enum amd_ip_block_type block_type)
1671{
1672        int i;
1673
1674        for (i = 0; i < adev->num_ip_blocks; i++) {
1675                if (!adev->ip_blocks[i].status.valid)
1676                        continue;
1677                if (adev->ip_blocks[i].version->type == block_type)
1678                        return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1679        }
1680        return true;
1681
1682}
1683
1684/**
1685 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1686 *
1687 * @adev: amdgpu_device pointer
1688 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
1689 *
1690 * Returns a pointer to the hardware IP block structure
1691 * if it exists for the asic, otherwise NULL.
1692 */
1693struct amdgpu_ip_block *
1694amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1695                              enum amd_ip_block_type type)
1696{
1697        int i;
1698
1699        for (i = 0; i < adev->num_ip_blocks; i++)
1700                if (adev->ip_blocks[i].version->type == type)
1701                        return &adev->ip_blocks[i];
1702
1703        return NULL;
1704}
1705
1706/**
1707 * amdgpu_device_ip_block_version_cmp
1708 *
1709 * @adev: amdgpu_device pointer
1710 * @type: enum amd_ip_block_type
1711 * @major: major version
1712 * @minor: minor version
1713 *
1714 * return 0 if equal or greater
1715 * return 1 if smaller or the ip_block doesn't exist
1716 */
1717int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1718                                       enum amd_ip_block_type type,
1719                                       u32 major, u32 minor)
1720{
1721        struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1722
1723        if (ip_block && ((ip_block->version->major > major) ||
1724                        ((ip_block->version->major == major) &&
1725                        (ip_block->version->minor >= minor))))
1726                return 0;
1727
1728        return 1;
1729}
1730
1731/**
1732 * amdgpu_device_ip_block_add
1733 *
1734 * @adev: amdgpu_device pointer
1735 * @ip_block_version: pointer to the IP to add
1736 *
1737 * Adds the IP block driver information to the collection of IPs
1738 * on the asic.
1739 */
1740int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1741                               const struct amdgpu_ip_block_version *ip_block_version)
1742{
1743        if (!ip_block_version)
1744                return -EINVAL;
1745
1746        switch (ip_block_version->type) {
1747        case AMD_IP_BLOCK_TYPE_VCN:
1748                if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1749                        return 0;
1750                break;
1751        case AMD_IP_BLOCK_TYPE_JPEG:
1752                if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1753                        return 0;
1754                break;
1755        default:
1756                break;
1757        }
1758
1759        DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1760                  ip_block_version->funcs->name);
1761
1762        adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1763
1764        return 0;
1765}
1766
1767/**
1768 * amdgpu_device_enable_virtual_display - enable virtual display feature
1769 *
1770 * @adev: amdgpu_device pointer
1771 *
1772 * Enabled the virtual display feature if the user has enabled it via
1773 * the module parameter virtual_display.  This feature provides a virtual
1774 * display hardware on headless boards or in virtualized environments.
1775 * This function parses and validates the configuration string specified by
1776 * the user and configues the virtual display configuration (number of
1777 * virtual connectors, crtcs, etc.) specified.
1778 */
1779static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1780{
1781        adev->enable_virtual_display = false;
1782
1783        if (amdgpu_virtual_display) {
1784                const char *pci_address_name = pci_name(adev->pdev);
1785                char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1786
1787                pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1788                pciaddstr_tmp = pciaddstr;
1789                while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1790                        pciaddname = strsep(&pciaddname_tmp, ",");
1791                        if (!strcmp("all", pciaddname)
1792                            || !strcmp(pci_address_name, pciaddname)) {
1793                                long num_crtc;
1794                                int res = -1;
1795
1796                                adev->enable_virtual_display = true;
1797
1798                                if (pciaddname_tmp)
1799                                        res = kstrtol(pciaddname_tmp, 10,
1800                                                      &num_crtc);
1801
1802                                if (!res) {
1803                                        if (num_crtc < 1)
1804                                                num_crtc = 1;
1805                                        if (num_crtc > 6)
1806                                                num_crtc = 6;
1807                                        adev->mode_info.num_crtc = num_crtc;
1808                                } else {
1809                                        adev->mode_info.num_crtc = 1;
1810                                }
1811                                break;
1812                        }
1813                }
1814
1815                DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1816                         amdgpu_virtual_display, pci_address_name,
1817                         adev->enable_virtual_display, adev->mode_info.num_crtc);
1818
1819                kfree(pciaddstr);
1820        }
1821}
1822
1823/**
1824 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1825 *
1826 * @adev: amdgpu_device pointer
1827 *
1828 * Parses the asic configuration parameters specified in the gpu info
1829 * firmware and makes them availale to the driver for use in configuring
1830 * the asic.
1831 * Returns 0 on success, -EINVAL on failure.
1832 */
1833static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1834{
1835        const char *chip_name;
1836        char fw_name[40];
1837        int err;
1838        const struct gpu_info_firmware_header_v1_0 *hdr;
1839
1840        adev->firmware.gpu_info_fw = NULL;
1841
1842        if (adev->mman.discovery_bin) {
1843                amdgpu_discovery_get_gfx_info(adev);
1844
1845                /*
1846                 * FIXME: The bounding box is still needed by Navi12, so
1847                 * temporarily read it from gpu_info firmware. Should be droped
1848                 * when DAL no longer needs it.
1849                 */
1850                if (adev->asic_type != CHIP_NAVI12)
1851                        return 0;
1852        }
1853
1854        switch (adev->asic_type) {
1855#ifdef CONFIG_DRM_AMDGPU_SI
1856        case CHIP_VERDE:
1857        case CHIP_TAHITI:
1858        case CHIP_PITCAIRN:
1859        case CHIP_OLAND:
1860        case CHIP_HAINAN:
1861#endif
1862#ifdef CONFIG_DRM_AMDGPU_CIK
1863        case CHIP_BONAIRE:
1864        case CHIP_HAWAII:
1865        case CHIP_KAVERI:
1866        case CHIP_KABINI:
1867        case CHIP_MULLINS:
1868#endif
1869        case CHIP_TOPAZ:
1870        case CHIP_TONGA:
1871        case CHIP_FIJI:
1872        case CHIP_POLARIS10:
1873        case CHIP_POLARIS11:
1874        case CHIP_POLARIS12:
1875        case CHIP_VEGAM:
1876        case CHIP_CARRIZO:
1877        case CHIP_STONEY:
1878        case CHIP_VEGA20:
1879        case CHIP_ALDEBARAN:
1880        case CHIP_SIENNA_CICHLID:
1881        case CHIP_NAVY_FLOUNDER:
1882        case CHIP_DIMGREY_CAVEFISH:
1883        case CHIP_BEIGE_GOBY:
1884        default:
1885                return 0;
1886        case CHIP_VEGA10:
1887                chip_name = "vega10";
1888                break;
1889        case CHIP_VEGA12:
1890                chip_name = "vega12";
1891                break;
1892        case CHIP_RAVEN:
1893                if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1894                        chip_name = "raven2";
1895                else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1896                        chip_name = "picasso";
1897                else
1898                        chip_name = "raven";
1899                break;
1900        case CHIP_ARCTURUS:
1901                chip_name = "arcturus";
1902                break;
1903        case CHIP_RENOIR:
1904                if (adev->apu_flags & AMD_APU_IS_RENOIR)
1905                        chip_name = "renoir";
1906                else
1907                        chip_name = "green_sardine";
1908                break;
1909        case CHIP_NAVI10:
1910                chip_name = "navi10";
1911                break;
1912        case CHIP_NAVI14:
1913                chip_name = "navi14";
1914                break;
1915        case CHIP_NAVI12:
1916                chip_name = "navi12";
1917                break;
1918        case CHIP_VANGOGH:
1919                chip_name = "vangogh";
1920                break;
1921        case CHIP_YELLOW_CARP:
1922                chip_name = "yellow_carp";
1923                break;
1924        }
1925
1926        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
1927        err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
1928        if (err) {
1929                dev_err(adev->dev,
1930                        "Failed to load gpu_info firmware \"%s\"\n",
1931                        fw_name);
1932                goto out;
1933        }
1934        err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
1935        if (err) {
1936                dev_err(adev->dev,
1937                        "Failed to validate gpu_info firmware \"%s\"\n",
1938                        fw_name);
1939                goto out;
1940        }
1941
1942        hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1943        amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1944
1945        switch (hdr->version_major) {
1946        case 1:
1947        {
1948                const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1949                        (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1950                                                                le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1951
1952                /*
1953                 * Should be droped when DAL no longer needs it.
1954                 */
1955                if (adev->asic_type == CHIP_NAVI12)
1956                        goto parse_soc_bounding_box;
1957
1958                adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1959                adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1960                adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1961                adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1962                adev->gfx.config.max_texture_channel_caches =
1963                        le32_to_cpu(gpu_info_fw->gc_num_tccs);
1964                adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1965                adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1966                adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1967                adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1968                adev->gfx.config.double_offchip_lds_buf =
1969                        le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1970                adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1971                adev->gfx.cu_info.max_waves_per_simd =
1972                        le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1973                adev->gfx.cu_info.max_scratch_slots_per_cu =
1974                        le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1975                adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1976                if (hdr->version_minor >= 1) {
1977                        const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1978                                (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1979                                                                        le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1980                        adev->gfx.config.num_sc_per_sh =
1981                                le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1982                        adev->gfx.config.num_packer_per_sc =
1983                                le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1984                }
1985
1986parse_soc_bounding_box:
1987                /*
1988                 * soc bounding box info is not integrated in disocovery table,
1989                 * we always need to parse it from gpu info firmware if needed.
1990                 */
1991                if (hdr->version_minor == 2) {
1992                        const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1993                                (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1994                                                                        le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1995                        adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1996                }
1997                break;
1998        }
1999        default:
2000                dev_err(adev->dev,
2001                        "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2002                err = -EINVAL;
2003                goto out;
2004        }
2005out:
2006        return err;
2007}
2008
2009/**
2010 * amdgpu_device_ip_early_init - run early init for hardware IPs
2011 *
2012 * @adev: amdgpu_device pointer
2013 *
2014 * Early initialization pass for hardware IPs.  The hardware IPs that make
2015 * up each asic are discovered each IP's early_init callback is run.  This
2016 * is the first stage in initializing the asic.
2017 * Returns 0 on success, negative error code on failure.
2018 */
2019static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2020{
2021        int i, r;
2022
2023        amdgpu_device_enable_virtual_display(adev);
2024
2025        if (amdgpu_sriov_vf(adev)) {
2026                r = amdgpu_virt_request_full_gpu(adev, true);
2027                if (r)
2028                        return r;
2029        }
2030
2031        switch (adev->asic_type) {
2032#ifdef CONFIG_DRM_AMDGPU_SI
2033        case CHIP_VERDE:
2034        case CHIP_TAHITI:
2035        case CHIP_PITCAIRN:
2036        case CHIP_OLAND:
2037        case CHIP_HAINAN:
2038                adev->family = AMDGPU_FAMILY_SI;
2039                r = si_set_ip_blocks(adev);
2040                if (r)
2041                        return r;
2042                break;
2043#endif
2044#ifdef CONFIG_DRM_AMDGPU_CIK
2045        case CHIP_BONAIRE:
2046        case CHIP_HAWAII:
2047        case CHIP_KAVERI:
2048        case CHIP_KABINI:
2049        case CHIP_MULLINS:
2050                if (adev->flags & AMD_IS_APU)
2051                        adev->family = AMDGPU_FAMILY_KV;
2052                else
2053                        adev->family = AMDGPU_FAMILY_CI;
2054
2055                r = cik_set_ip_blocks(adev);
2056                if (r)
2057                        return r;
2058                break;
2059#endif
2060        case CHIP_TOPAZ:
2061        case CHIP_TONGA:
2062        case CHIP_FIJI:
2063        case CHIP_POLARIS10:
2064        case CHIP_POLARIS11:
2065        case CHIP_POLARIS12:
2066        case CHIP_VEGAM:
2067        case CHIP_CARRIZO:
2068        case CHIP_STONEY:
2069                if (adev->flags & AMD_IS_APU)
2070                        adev->family = AMDGPU_FAMILY_CZ;
2071                else
2072                        adev->family = AMDGPU_FAMILY_VI;
2073
2074                r = vi_set_ip_blocks(adev);
2075                if (r)
2076                        return r;
2077                break;
2078        case CHIP_VEGA10:
2079        case CHIP_VEGA12:
2080        case CHIP_VEGA20:
2081        case CHIP_RAVEN:
2082        case CHIP_ARCTURUS:
2083        case CHIP_RENOIR:
2084        case CHIP_ALDEBARAN:
2085                if (adev->flags & AMD_IS_APU)
2086                        adev->family = AMDGPU_FAMILY_RV;
2087                else
2088                        adev->family = AMDGPU_FAMILY_AI;
2089
2090                r = soc15_set_ip_blocks(adev);
2091                if (r)
2092                        return r;
2093                break;
2094        case  CHIP_NAVI10:
2095        case  CHIP_NAVI14:
2096        case  CHIP_NAVI12:
2097        case  CHIP_SIENNA_CICHLID:
2098        case  CHIP_NAVY_FLOUNDER:
2099        case  CHIP_DIMGREY_CAVEFISH:
2100        case  CHIP_BEIGE_GOBY:
2101        case CHIP_VANGOGH:
2102        case CHIP_YELLOW_CARP:
2103                if (adev->asic_type == CHIP_VANGOGH)
2104                        adev->family = AMDGPU_FAMILY_VGH;
2105                else if (adev->asic_type == CHIP_YELLOW_CARP)
2106                        adev->family = AMDGPU_FAMILY_YC;
2107                else
2108                        adev->family = AMDGPU_FAMILY_NV;
2109
2110                r = nv_set_ip_blocks(adev);
2111                if (r)
2112                        return r;
2113                break;
2114        default:
2115                /* FIXME: not supported yet */
2116                return -EINVAL;
2117        }
2118
2119        amdgpu_amdkfd_device_probe(adev);
2120
2121        adev->pm.pp_feature = amdgpu_pp_feature_mask;
2122        if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2123                adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2124        if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2125                adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2126
2127        for (i = 0; i < adev->num_ip_blocks; i++) {
2128                if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2129                        DRM_ERROR("disabled ip block: %d <%s>\n",
2130                                  i, adev->ip_blocks[i].version->funcs->name);
2131                        adev->ip_blocks[i].status.valid = false;
2132                } else {
2133                        if (adev->ip_blocks[i].version->funcs->early_init) {
2134                                r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2135                                if (r == -ENOENT) {
2136                                        adev->ip_blocks[i].status.valid = false;
2137                                } else if (r) {
2138                                        DRM_ERROR("early_init of IP block <%s> failed %d\n",
2139                                                  adev->ip_blocks[i].version->funcs->name, r);
2140                                        return r;
2141                                } else {
2142                                        adev->ip_blocks[i].status.valid = true;
2143                                }
2144                        } else {
2145                                adev->ip_blocks[i].status.valid = true;
2146                        }
2147                }
2148                /* get the vbios after the asic_funcs are set up */
2149                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2150                        r = amdgpu_device_parse_gpu_info_fw(adev);
2151                        if (r)
2152                                return r;
2153
2154                        /* Read BIOS */
2155                        if (!amdgpu_get_bios(adev))
2156                                return -EINVAL;
2157
2158                        r = amdgpu_atombios_init(adev);
2159                        if (r) {
2160                                dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2161                                amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2162                                return r;
2163                        }
2164
2165                        /*get pf2vf msg info at it's earliest time*/
2166                        if (amdgpu_sriov_vf(adev))
2167                                amdgpu_virt_init_data_exchange(adev);
2168
2169                }
2170        }
2171
2172        adev->cg_flags &= amdgpu_cg_mask;
2173        adev->pg_flags &= amdgpu_pg_mask;
2174
2175        return 0;
2176}
2177
2178static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2179{
2180        int i, r;
2181
2182        for (i = 0; i < adev->num_ip_blocks; i++) {
2183                if (!adev->ip_blocks[i].status.sw)
2184                        continue;
2185                if (adev->ip_blocks[i].status.hw)
2186                        continue;
2187                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2188                    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2189                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2190                        r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2191                        if (r) {
2192                                DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2193                                          adev->ip_blocks[i].version->funcs->name, r);
2194                                return r;
2195                        }
2196                        adev->ip_blocks[i].status.hw = true;
2197                }
2198        }
2199
2200        return 0;
2201}
2202
2203static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2204{
2205        int i, r;
2206
2207        for (i = 0; i < adev->num_ip_blocks; i++) {
2208                if (!adev->ip_blocks[i].status.sw)
2209                        continue;
2210                if (adev->ip_blocks[i].status.hw)
2211                        continue;
2212                r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2213                if (r) {
2214                        DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2215                                  adev->ip_blocks[i].version->funcs->name, r);
2216                        return r;
2217                }
2218                adev->ip_blocks[i].status.hw = true;
2219        }
2220
2221        return 0;
2222}
2223
2224static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2225{
2226        int r = 0;
2227        int i;
2228        uint32_t smu_version;
2229
2230        if (adev->asic_type >= CHIP_VEGA10) {
2231                for (i = 0; i < adev->num_ip_blocks; i++) {
2232                        if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2233                                continue;
2234
2235                        if (!adev->ip_blocks[i].status.sw)
2236                                continue;
2237
2238                        /* no need to do the fw loading again if already done*/
2239                        if (adev->ip_blocks[i].status.hw == true)
2240                                break;
2241
2242                        if (amdgpu_in_reset(adev) || adev->in_suspend) {
2243                                r = adev->ip_blocks[i].version->funcs->resume(adev);
2244                                if (r) {
2245                                        DRM_ERROR("resume of IP block <%s> failed %d\n",
2246                                                          adev->ip_blocks[i].version->funcs->name, r);
2247                                        return r;
2248                                }
2249                        } else {
2250                                r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2251                                if (r) {
2252                                        DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2253                                                          adev->ip_blocks[i].version->funcs->name, r);
2254                                        return r;
2255                                }
2256                        }
2257
2258                        adev->ip_blocks[i].status.hw = true;
2259                        break;
2260                }
2261        }
2262
2263        if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2264                r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2265
2266        return r;
2267}
2268
2269/**
2270 * amdgpu_device_ip_init - run init for hardware IPs
2271 *
2272 * @adev: amdgpu_device pointer
2273 *
2274 * Main initialization pass for hardware IPs.  The list of all the hardware
2275 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2276 * are run.  sw_init initializes the software state associated with each IP
2277 * and hw_init initializes the hardware associated with each IP.
2278 * Returns 0 on success, negative error code on failure.
2279 */
2280static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2281{
2282        int i, r;
2283
2284        r = amdgpu_ras_init(adev);
2285        if (r)
2286                return r;
2287
2288        for (i = 0; i < adev->num_ip_blocks; i++) {
2289                if (!adev->ip_blocks[i].status.valid)
2290                        continue;
2291                r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2292                if (r) {
2293                        DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2294                                  adev->ip_blocks[i].version->funcs->name, r);
2295                        goto init_failed;
2296                }
2297                adev->ip_blocks[i].status.sw = true;
2298
2299                /* need to do gmc hw init early so we can allocate gpu mem */
2300                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2301                        r = amdgpu_device_vram_scratch_init(adev);
2302                        if (r) {
2303                                DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
2304                                goto init_failed;
2305                        }
2306                        r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2307                        if (r) {
2308                                DRM_ERROR("hw_init %d failed %d\n", i, r);
2309                                goto init_failed;
2310                        }
2311                        r = amdgpu_device_wb_init(adev);
2312                        if (r) {
2313                                DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2314                                goto init_failed;
2315                        }
2316                        adev->ip_blocks[i].status.hw = true;
2317
2318                        /* right after GMC hw init, we create CSA */
2319                        if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
2320                                r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2321                                                                AMDGPU_GEM_DOMAIN_VRAM,
2322                                                                AMDGPU_CSA_SIZE);
2323                                if (r) {
2324                                        DRM_ERROR("allocate CSA failed %d\n", r);
2325                                        goto init_failed;
2326                                }
2327                        }
2328                }
2329        }
2330
2331        if (amdgpu_sriov_vf(adev))
2332                amdgpu_virt_init_data_exchange(adev);
2333
2334        r = amdgpu_ib_pool_init(adev);
2335        if (r) {
2336                dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2337                amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2338                goto init_failed;
2339        }
2340
2341        r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2342        if (r)
2343                goto init_failed;
2344
2345        r = amdgpu_device_ip_hw_init_phase1(adev);
2346        if (r)
2347                goto init_failed;
2348
2349        r = amdgpu_device_fw_loading(adev);
2350        if (r)
2351                goto init_failed;
2352
2353        r = amdgpu_device_ip_hw_init_phase2(adev);
2354        if (r)
2355                goto init_failed;
2356
2357        /*
2358         * retired pages will be loaded from eeprom and reserved here,
2359         * it should be called after amdgpu_device_ip_hw_init_phase2  since
2360         * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2361         * for I2C communication which only true at this point.
2362         *
2363         * amdgpu_ras_recovery_init may fail, but the upper only cares the
2364         * failure from bad gpu situation and stop amdgpu init process
2365         * accordingly. For other failed cases, it will still release all
2366         * the resource and print error message, rather than returning one
2367         * negative value to upper level.
2368         *
2369         * Note: theoretically, this should be called before all vram allocations
2370         * to protect retired page from abusing
2371         */
2372        r = amdgpu_ras_recovery_init(adev);
2373        if (r)
2374                goto init_failed;
2375
2376        if (adev->gmc.xgmi.num_physical_nodes > 1)
2377                amdgpu_xgmi_add_device(adev);
2378
2379        /* Don't init kfd if whole hive need to be reset during init */
2380        if (!adev->gmc.xgmi.pending_reset)
2381                amdgpu_amdkfd_device_init(adev);
2382
2383        amdgpu_fru_get_product_info(adev);
2384
2385init_failed:
2386        if (amdgpu_sriov_vf(adev))
2387                amdgpu_virt_release_full_gpu(adev, true);
2388
2389        return r;
2390}
2391
2392/**
2393 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2394 *
2395 * @adev: amdgpu_device pointer
2396 *
2397 * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2398 * this function before a GPU reset.  If the value is retained after a
2399 * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2400 */
2401static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2402{
2403        memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2404}
2405
2406/**
2407 * amdgpu_device_check_vram_lost - check if vram is valid
2408 *
2409 * @adev: amdgpu_device pointer
2410 *
2411 * Checks the reset magic value written to the gart pointer in VRAM.
2412 * The driver calls this after a GPU reset to see if the contents of
2413 * VRAM is lost or now.
2414 * returns true if vram is lost, false if not.
2415 */
2416static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2417{
2418        if (memcmp(adev->gart.ptr, adev->reset_magic,
2419                        AMDGPU_RESET_MAGIC_NUM))
2420                return true;
2421
2422        if (!amdgpu_in_reset(adev))
2423                return false;
2424
2425        /*
2426         * For all ASICs with baco/mode1 reset, the VRAM is
2427         * always assumed to be lost.
2428         */
2429        switch (amdgpu_asic_reset_method(adev)) {
2430        case AMD_RESET_METHOD_BACO:
2431        case AMD_RESET_METHOD_MODE1:
2432                return true;
2433        default:
2434                return false;
2435        }
2436}
2437
2438/**
2439 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2440 *
2441 * @adev: amdgpu_device pointer
2442 * @state: clockgating state (gate or ungate)
2443 *
2444 * The list of all the hardware IPs that make up the asic is walked and the
2445 * set_clockgating_state callbacks are run.
2446 * Late initialization pass enabling clockgating for hardware IPs.
2447 * Fini or suspend, pass disabling clockgating for hardware IPs.
2448 * Returns 0 on success, negative error code on failure.
2449 */
2450
2451int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2452                               enum amd_clockgating_state state)
2453{
2454        int i, j, r;
2455
2456        if (amdgpu_emu_mode == 1)
2457                return 0;
2458
2459        for (j = 0; j < adev->num_ip_blocks; j++) {
2460                i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2461                if (!adev->ip_blocks[i].status.late_initialized)
2462                        continue;
2463                /* skip CG for GFX on S0ix */
2464                if (adev->in_s0ix &&
2465                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
2466                        continue;
2467                /* skip CG for VCE/UVD, it's handled specially */
2468                if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2469                    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2470                    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2471                    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2472                    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2473                        /* enable clockgating to save power */
2474                        r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2475                                                                                     state);
2476                        if (r) {
2477                                DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2478                                          adev->ip_blocks[i].version->funcs->name, r);
2479                                return r;
2480                        }
2481                }
2482        }
2483
2484        return 0;
2485}
2486
2487int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2488                               enum amd_powergating_state state)
2489{
2490        int i, j, r;
2491
2492        if (amdgpu_emu_mode == 1)
2493                return 0;
2494
2495        for (j = 0; j < adev->num_ip_blocks; j++) {
2496                i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2497                if (!adev->ip_blocks[i].status.late_initialized)
2498                        continue;
2499                /* skip PG for GFX on S0ix */
2500                if (adev->in_s0ix &&
2501                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
2502                        continue;
2503                /* skip CG for VCE/UVD, it's handled specially */
2504                if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2505                    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2506                    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2507                    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2508                    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2509                        /* enable powergating to save power */
2510                        r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2511                                                                                        state);
2512                        if (r) {
2513                                DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2514                                          adev->ip_blocks[i].version->funcs->name, r);
2515                                return r;
2516                        }
2517                }
2518        }
2519        return 0;
2520}
2521
2522static int amdgpu_device_enable_mgpu_fan_boost(void)
2523{
2524        struct amdgpu_gpu_instance *gpu_ins;
2525        struct amdgpu_device *adev;
2526        int i, ret = 0;
2527
2528        mutex_lock(&mgpu_info.mutex);
2529
2530        /*
2531         * MGPU fan boost feature should be enabled
2532         * only when there are two or more dGPUs in
2533         * the system
2534         */
2535        if (mgpu_info.num_dgpu < 2)
2536                goto out;
2537
2538        for (i = 0; i < mgpu_info.num_dgpu; i++) {
2539                gpu_ins = &(mgpu_info.gpu_ins[i]);
2540                adev = gpu_ins->adev;
2541                if (!(adev->flags & AMD_IS_APU) &&
2542                    !gpu_ins->mgpu_fan_enabled) {
2543                        ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2544                        if (ret)
2545                                break;
2546
2547                        gpu_ins->mgpu_fan_enabled = 1;
2548                }
2549        }
2550
2551out:
2552        mutex_unlock(&mgpu_info.mutex);
2553
2554        return ret;
2555}
2556
2557/**
2558 * amdgpu_device_ip_late_init - run late init for hardware IPs
2559 *
2560 * @adev: amdgpu_device pointer
2561 *
2562 * Late initialization pass for hardware IPs.  The list of all the hardware
2563 * IPs that make up the asic is walked and the late_init callbacks are run.
2564 * late_init covers any special initialization that an IP requires
2565 * after all of the have been initialized or something that needs to happen
2566 * late in the init process.
2567 * Returns 0 on success, negative error code on failure.
2568 */
2569static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2570{
2571        struct amdgpu_gpu_instance *gpu_instance;
2572        int i = 0, r;
2573
2574        for (i = 0; i < adev->num_ip_blocks; i++) {
2575                if (!adev->ip_blocks[i].status.hw)
2576                        continue;
2577                if (adev->ip_blocks[i].version->funcs->late_init) {
2578                        r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2579                        if (r) {
2580                                DRM_ERROR("late_init of IP block <%s> failed %d\n",
2581                                          adev->ip_blocks[i].version->funcs->name, r);
2582                                return r;
2583                        }
2584                }
2585                adev->ip_blocks[i].status.late_initialized = true;
2586        }
2587
2588        amdgpu_ras_set_error_query_ready(adev, true);
2589
2590        amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2591        amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2592
2593        amdgpu_device_fill_reset_magic(adev);
2594
2595        r = amdgpu_device_enable_mgpu_fan_boost();
2596        if (r)
2597                DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2598
2599        /* For XGMI + passthrough configuration on arcturus, enable light SBR */
2600        if (adev->asic_type == CHIP_ARCTURUS &&
2601            amdgpu_passthrough(adev) &&
2602            adev->gmc.xgmi.num_physical_nodes > 1)
2603                smu_set_light_sbr(&adev->smu, true);
2604
2605        if (adev->gmc.xgmi.num_physical_nodes > 1) {
2606                mutex_lock(&mgpu_info.mutex);
2607
2608                /*
2609                 * Reset device p-state to low as this was booted with high.
2610                 *
2611                 * This should be performed only after all devices from the same
2612                 * hive get initialized.
2613                 *
2614                 * However, it's unknown how many device in the hive in advance.
2615                 * As this is counted one by one during devices initializations.
2616                 *
2617                 * So, we wait for all XGMI interlinked devices initialized.
2618                 * This may bring some delays as those devices may come from
2619                 * different hives. But that should be OK.
2620                 */
2621                if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2622                        for (i = 0; i < mgpu_info.num_gpu; i++) {
2623                                gpu_instance = &(mgpu_info.gpu_ins[i]);
2624                                if (gpu_instance->adev->flags & AMD_IS_APU)
2625                                        continue;
2626
2627                                r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2628                                                AMDGPU_XGMI_PSTATE_MIN);
2629                                if (r) {
2630                                        DRM_ERROR("pstate setting failed (%d).\n", r);
2631                                        break;
2632                                }
2633                        }
2634                }
2635
2636                mutex_unlock(&mgpu_info.mutex);
2637        }
2638
2639        return 0;
2640}
2641
2642static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
2643{
2644        int i, r;
2645
2646        for (i = 0; i < adev->num_ip_blocks; i++) {
2647                if (!adev->ip_blocks[i].version->funcs->early_fini)
2648                        continue;
2649
2650                r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2651                if (r) {
2652                        DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2653                                  adev->ip_blocks[i].version->funcs->name, r);
2654                }
2655        }
2656
2657        amdgpu_amdkfd_suspend(adev, false);
2658
2659        amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2660        amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2661
2662        /* need to disable SMC first */
2663        for (i = 0; i < adev->num_ip_blocks; i++) {
2664                if (!adev->ip_blocks[i].status.hw)
2665                        continue;
2666                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2667                        r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2668                        /* XXX handle errors */
2669                        if (r) {
2670                                DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2671                                          adev->ip_blocks[i].version->funcs->name, r);
2672                        }
2673                        adev->ip_blocks[i].status.hw = false;
2674                        break;
2675                }
2676        }
2677
2678        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2679                if (!adev->ip_blocks[i].status.hw)
2680                        continue;
2681
2682                r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2683                /* XXX handle errors */
2684                if (r) {
2685                        DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2686                                  adev->ip_blocks[i].version->funcs->name, r);
2687                }
2688
2689                adev->ip_blocks[i].status.hw = false;
2690        }
2691
2692        return 0;
2693}
2694
2695/**
2696 * amdgpu_device_ip_fini - run fini for hardware IPs
2697 *
2698 * @adev: amdgpu_device pointer
2699 *
2700 * Main teardown pass for hardware IPs.  The list of all the hardware
2701 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2702 * are run.  hw_fini tears down the hardware associated with each IP
2703 * and sw_fini tears down any software state associated with each IP.
2704 * Returns 0 on success, negative error code on failure.
2705 */
2706static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2707{
2708        int i, r;
2709
2710        if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2711                amdgpu_virt_release_ras_err_handler_data(adev);
2712
2713        amdgpu_ras_pre_fini(adev);
2714
2715        if (adev->gmc.xgmi.num_physical_nodes > 1)
2716                amdgpu_xgmi_remove_device(adev);
2717
2718        amdgpu_amdkfd_device_fini_sw(adev);
2719
2720        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2721                if (!adev->ip_blocks[i].status.sw)
2722                        continue;
2723
2724                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2725                        amdgpu_ucode_free_bo(adev);
2726                        amdgpu_free_static_csa(&adev->virt.csa_obj);
2727                        amdgpu_device_wb_fini(adev);
2728                        amdgpu_device_vram_scratch_fini(adev);
2729                        amdgpu_ib_pool_fini(adev);
2730                }
2731
2732                r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2733                /* XXX handle errors */
2734                if (r) {
2735                        DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2736                                  adev->ip_blocks[i].version->funcs->name, r);
2737                }
2738                adev->ip_blocks[i].status.sw = false;
2739                adev->ip_blocks[i].status.valid = false;
2740        }
2741
2742        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2743                if (!adev->ip_blocks[i].status.late_initialized)
2744                        continue;
2745                if (adev->ip_blocks[i].version->funcs->late_fini)
2746                        adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2747                adev->ip_blocks[i].status.late_initialized = false;
2748        }
2749
2750        amdgpu_ras_fini(adev);
2751
2752        if (amdgpu_sriov_vf(adev))
2753                if (amdgpu_virt_release_full_gpu(adev, false))
2754                        DRM_ERROR("failed to release exclusive mode on fini\n");
2755
2756        return 0;
2757}
2758
2759/**
2760 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2761 *
2762 * @work: work_struct.
2763 */
2764static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2765{
2766        struct amdgpu_device *adev =
2767                container_of(work, struct amdgpu_device, delayed_init_work.work);
2768        int r;
2769
2770        r = amdgpu_ib_ring_tests(adev);
2771        if (r)
2772                DRM_ERROR("ib ring test failed (%d).\n", r);
2773}
2774
2775static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2776{
2777        struct amdgpu_device *adev =
2778                container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2779
2780        WARN_ON_ONCE(adev->gfx.gfx_off_state);
2781        WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2782
2783        if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2784                adev->gfx.gfx_off_state = true;
2785}
2786
2787/**
2788 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2789 *
2790 * @adev: amdgpu_device pointer
2791 *
2792 * Main suspend function for hardware IPs.  The list of all the hardware
2793 * IPs that make up the asic is walked, clockgating is disabled and the
2794 * suspend callbacks are run.  suspend puts the hardware and software state
2795 * in each IP into a state suitable for suspend.
2796 * Returns 0 on success, negative error code on failure.
2797 */
2798static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2799{
2800        int i, r;
2801
2802        amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2803        amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2804
2805        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2806                if (!adev->ip_blocks[i].status.valid)
2807                        continue;
2808
2809                /* displays are handled separately */
2810                if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2811                        continue;
2812
2813                /* XXX handle errors */
2814                r = adev->ip_blocks[i].version->funcs->suspend(adev);
2815                /* XXX handle errors */
2816                if (r) {
2817                        DRM_ERROR("suspend of IP block <%s> failed %d\n",
2818                                  adev->ip_blocks[i].version->funcs->name, r);
2819                        return r;
2820                }
2821
2822                adev->ip_blocks[i].status.hw = false;
2823        }
2824
2825        return 0;
2826}
2827
2828/**
2829 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2830 *
2831 * @adev: amdgpu_device pointer
2832 *
2833 * Main suspend function for hardware IPs.  The list of all the hardware
2834 * IPs that make up the asic is walked, clockgating is disabled and the
2835 * suspend callbacks are run.  suspend puts the hardware and software state
2836 * in each IP into a state suitable for suspend.
2837 * Returns 0 on success, negative error code on failure.
2838 */
2839static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
2840{
2841        int i, r;
2842
2843        if (adev->in_s0ix)
2844                amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry);
2845
2846        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2847                if (!adev->ip_blocks[i].status.valid)
2848                        continue;
2849                /* displays are handled in phase1 */
2850                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2851                        continue;
2852                /* PSP lost connection when err_event_athub occurs */
2853                if (amdgpu_ras_intr_triggered() &&
2854                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2855                        adev->ip_blocks[i].status.hw = false;
2856                        continue;
2857                }
2858
2859                /* skip unnecessary suspend if we do not initialize them yet */
2860                if (adev->gmc.xgmi.pending_reset &&
2861                    !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2862                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
2863                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2864                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
2865                        adev->ip_blocks[i].status.hw = false;
2866                        continue;
2867                }
2868
2869                /* skip suspend of gfx and psp for S0ix
2870                 * gfx is in gfxoff state, so on resume it will exit gfxoff just
2871                 * like at runtime. PSP is also part of the always on hardware
2872                 * so no need to suspend it.
2873                 */
2874                if (adev->in_s0ix &&
2875                    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
2876                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX))
2877                        continue;
2878
2879                /* XXX handle errors */
2880                r = adev->ip_blocks[i].version->funcs->suspend(adev);
2881                /* XXX handle errors */
2882                if (r) {
2883                        DRM_ERROR("suspend of IP block <%s> failed %d\n",
2884                                  adev->ip_blocks[i].version->funcs->name, r);
2885                }
2886                adev->ip_blocks[i].status.hw = false;
2887                /* handle putting the SMC in the appropriate state */
2888                if(!amdgpu_sriov_vf(adev)){
2889                        if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2890                                r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2891                                if (r) {
2892                                        DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2893                                                        adev->mp1_state, r);
2894                                        return r;
2895                                }
2896                        }
2897                }
2898        }
2899
2900        return 0;
2901}
2902
2903/**
2904 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2905 *
2906 * @adev: amdgpu_device pointer
2907 *
2908 * Main suspend function for hardware IPs.  The list of all the hardware
2909 * IPs that make up the asic is walked, clockgating is disabled and the
2910 * suspend callbacks are run.  suspend puts the hardware and software state
2911 * in each IP into a state suitable for suspend.
2912 * Returns 0 on success, negative error code on failure.
2913 */
2914int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2915{
2916        int r;
2917
2918        if (amdgpu_sriov_vf(adev)) {
2919                amdgpu_virt_fini_data_exchange(adev);
2920                amdgpu_virt_request_full_gpu(adev, false);
2921        }
2922
2923        r = amdgpu_device_ip_suspend_phase1(adev);
2924        if (r)
2925                return r;
2926        r = amdgpu_device_ip_suspend_phase2(adev);
2927
2928        if (amdgpu_sriov_vf(adev))
2929                amdgpu_virt_release_full_gpu(adev, false);
2930
2931        return r;
2932}
2933
2934static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
2935{
2936        int i, r;
2937
2938        static enum amd_ip_block_type ip_order[] = {
2939                AMD_IP_BLOCK_TYPE_GMC,
2940                AMD_IP_BLOCK_TYPE_COMMON,
2941                AMD_IP_BLOCK_TYPE_PSP,
2942                AMD_IP_BLOCK_TYPE_IH,
2943        };
2944
2945        for (i = 0; i < adev->num_ip_blocks; i++) {
2946                int j;
2947                struct amdgpu_ip_block *block;
2948
2949                block = &adev->ip_blocks[i];
2950                block->status.hw = false;
2951
2952                for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2953
2954                        if (block->version->type != ip_order[j] ||
2955                                !block->status.valid)
2956                                continue;
2957
2958                        r = block->version->funcs->hw_init(adev);
2959                        DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2960                        if (r)
2961                                return r;
2962                        block->status.hw = true;
2963                }
2964        }
2965
2966        return 0;
2967}
2968
2969static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
2970{
2971        int i, r;
2972
2973        static enum amd_ip_block_type ip_order[] = {
2974                AMD_IP_BLOCK_TYPE_SMC,
2975                AMD_IP_BLOCK_TYPE_DCE,
2976                AMD_IP_BLOCK_TYPE_GFX,
2977                AMD_IP_BLOCK_TYPE_SDMA,
2978                AMD_IP_BLOCK_TYPE_UVD,
2979                AMD_IP_BLOCK_TYPE_VCE,
2980                AMD_IP_BLOCK_TYPE_VCN
2981        };
2982
2983        for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2984                int j;
2985                struct amdgpu_ip_block *block;
2986
2987                for (j = 0; j < adev->num_ip_blocks; j++) {
2988                        block = &adev->ip_blocks[j];
2989
2990                        if (block->version->type != ip_order[i] ||
2991                                !block->status.valid ||
2992                                block->status.hw)
2993                                continue;
2994
2995                        if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2996                                r = block->version->funcs->resume(adev);
2997                        else
2998                                r = block->version->funcs->hw_init(adev);
2999
3000                        DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3001                        if (r)
3002                                return r;
3003                        block->status.hw = true;
3004                }
3005        }
3006
3007        return 0;
3008}
3009
3010/**
3011 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3012 *
3013 * @adev: amdgpu_device pointer
3014 *
3015 * First resume function for hardware IPs.  The list of all the hardware
3016 * IPs that make up the asic is walked and the resume callbacks are run for
3017 * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3018 * after a suspend and updates the software state as necessary.  This
3019 * function is also used for restoring the GPU after a GPU reset.
3020 * Returns 0 on success, negative error code on failure.
3021 */
3022static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3023{
3024        int i, r;
3025
3026        for (i = 0; i < adev->num_ip_blocks; i++) {
3027                if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3028                        continue;
3029                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3030                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3031                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
3032
3033                        r = adev->ip_blocks[i].version->funcs->resume(adev);
3034                        if (r) {
3035                                DRM_ERROR("resume of IP block <%s> failed %d\n",
3036                                          adev->ip_blocks[i].version->funcs->name, r);
3037                                return r;
3038                        }
3039                        adev->ip_blocks[i].status.hw = true;
3040                }
3041        }
3042
3043        return 0;
3044}
3045
3046/**
3047 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3048 *
3049 * @adev: amdgpu_device pointer
3050 *
3051 * First resume function for hardware IPs.  The list of all the hardware
3052 * IPs that make up the asic is walked and the resume callbacks are run for
3053 * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3054 * functional state after a suspend and updates the software state as
3055 * necessary.  This function is also used for restoring the GPU after a GPU
3056 * reset.
3057 * Returns 0 on success, negative error code on failure.
3058 */
3059static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3060{
3061        int i, r;
3062
3063        for (i = 0; i < adev->num_ip_blocks; i++) {
3064                if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3065                        continue;
3066                if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3067                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3068                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3069                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3070                        continue;
3071                r = adev->ip_blocks[i].version->funcs->resume(adev);
3072                if (r) {
3073                        DRM_ERROR("resume of IP block <%s> failed %d\n",
3074                                  adev->ip_blocks[i].version->funcs->name, r);
3075                        return r;
3076                }
3077                adev->ip_blocks[i].status.hw = true;
3078        }
3079
3080        return 0;
3081}
3082
3083/**
3084 * amdgpu_device_ip_resume - run resume for hardware IPs
3085 *
3086 * @adev: amdgpu_device pointer
3087 *
3088 * Main resume function for hardware IPs.  The hardware IPs
3089 * are split into two resume functions because they are
3090 * are also used in in recovering from a GPU reset and some additional
3091 * steps need to be take between them.  In this case (S3/S4) they are
3092 * run sequentially.
3093 * Returns 0 on success, negative error code on failure.
3094 */
3095static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3096{
3097        int r;
3098
3099        r = amdgpu_device_ip_resume_phase1(adev);
3100        if (r)
3101                return r;
3102
3103        r = amdgpu_device_fw_loading(adev);
3104        if (r)
3105                return r;
3106
3107        r = amdgpu_device_ip_resume_phase2(adev);
3108
3109        return r;
3110}
3111
3112/**
3113 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3114 *
3115 * @adev: amdgpu_device pointer
3116 *
3117 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3118 */
3119static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3120{
3121        if (amdgpu_sriov_vf(adev)) {
3122                if (adev->is_atom_fw) {
3123                        if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3124                                adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3125                } else {
3126                        if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3127                                adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3128                }
3129
3130                if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3131                        amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3132        }
3133}
3134
3135/**
3136 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3137 *
3138 * @asic_type: AMD asic type
3139 *
3140 * Check if there is DC (new modesetting infrastructre) support for an asic.
3141 * returns true if DC has support, false if not.
3142 */
3143bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3144{
3145        switch (asic_type) {
3146#if defined(CONFIG_DRM_AMD_DC)
3147#if defined(CONFIG_DRM_AMD_DC_SI)
3148        case CHIP_TAHITI:
3149        case CHIP_PITCAIRN:
3150        case CHIP_VERDE:
3151        case CHIP_OLAND:
3152#endif
3153        case CHIP_BONAIRE:
3154        case CHIP_KAVERI:
3155        case CHIP_KABINI:
3156        case CHIP_MULLINS:
3157                /*
3158                 * We have systems in the wild with these ASICs that require
3159                 * LVDS and VGA support which is not supported with DC.
3160                 *
3161                 * Fallback to the non-DC driver here by default so as not to
3162                 * cause regressions.
3163                 */
3164                return amdgpu_dc > 0;
3165        case CHIP_HAWAII:
3166        case CHIP_CARRIZO:
3167        case CHIP_STONEY:
3168        case CHIP_POLARIS10:
3169        case CHIP_POLARIS11:
3170        case CHIP_POLARIS12:
3171        case CHIP_VEGAM:
3172        case CHIP_TONGA:
3173        case CHIP_FIJI:
3174        case CHIP_VEGA10:
3175        case CHIP_VEGA12:
3176        case CHIP_VEGA20:
3177#if defined(CONFIG_DRM_AMD_DC_DCN)
3178        case CHIP_RAVEN:
3179        case CHIP_NAVI10:
3180        case CHIP_NAVI14:
3181        case CHIP_NAVI12:
3182        case CHIP_RENOIR:
3183        case CHIP_SIENNA_CICHLID:
3184        case CHIP_NAVY_FLOUNDER:
3185        case CHIP_DIMGREY_CAVEFISH:
3186        case CHIP_BEIGE_GOBY:
3187        case CHIP_VANGOGH:
3188        case CHIP_YELLOW_CARP:
3189#endif
3190                return amdgpu_dc != 0;
3191#endif
3192        default:
3193                if (amdgpu_dc > 0)
3194                        DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
3195                                         "but isn't supported by ASIC, ignoring\n");
3196                return false;
3197        }
3198}
3199
3200/**
3201 * amdgpu_device_has_dc_support - check if dc is supported
3202 *
3203 * @adev: amdgpu_device pointer
3204 *
3205 * Returns true for supported, false for not supported
3206 */
3207bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3208{
3209        if (amdgpu_sriov_vf(adev) || 
3210            adev->enable_virtual_display ||
3211            (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3212                return false;
3213
3214        return amdgpu_device_asic_has_dc_support(adev->asic_type);
3215}
3216
3217static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3218{
3219        struct amdgpu_device *adev =
3220                container_of(__work, struct amdgpu_device, xgmi_reset_work);
3221        struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3222
3223        /* It's a bug to not have a hive within this function */
3224        if (WARN_ON(!hive))
3225                return;
3226
3227        /*
3228         * Use task barrier to synchronize all xgmi reset works across the
3229         * hive. task_barrier_enter and task_barrier_exit will block
3230         * until all the threads running the xgmi reset works reach
3231         * those points. task_barrier_full will do both blocks.
3232         */
3233        if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3234
3235                task_barrier_enter(&hive->tb);
3236                adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3237
3238                if (adev->asic_reset_res)
3239                        goto fail;
3240
3241                task_barrier_exit(&hive->tb);
3242                adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3243
3244                if (adev->asic_reset_res)
3245                        goto fail;
3246
3247                if (adev->mmhub.ras_funcs &&
3248                    adev->mmhub.ras_funcs->reset_ras_error_count)
3249                        adev->mmhub.ras_funcs->reset_ras_error_count(adev);
3250        } else {
3251
3252                task_barrier_full(&hive->tb);
3253                adev->asic_reset_res =  amdgpu_asic_reset(adev);
3254        }
3255
3256fail:
3257        if (adev->asic_reset_res)
3258                DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3259                         adev->asic_reset_res, adev_to_drm(adev)->unique);
3260        amdgpu_put_xgmi_hive(hive);
3261}
3262
3263static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3264{
3265        char *input = amdgpu_lockup_timeout;
3266        char *timeout_setting = NULL;
3267        int index = 0;
3268        long timeout;
3269        int ret = 0;
3270
3271        /*
3272         * By default timeout for non compute jobs is 10000
3273         * and 60000 for compute jobs.
3274         * In SR-IOV or passthrough mode, timeout for compute
3275         * jobs are 60000 by default.
3276         */
3277        adev->gfx_timeout = msecs_to_jiffies(10000);
3278        adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3279        if (amdgpu_sriov_vf(adev))
3280                adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3281                                        msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3282        else
3283                adev->compute_timeout =  msecs_to_jiffies(60000);
3284
3285        if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3286                while ((timeout_setting = strsep(&input, ",")) &&
3287                                strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3288                        ret = kstrtol(timeout_setting, 0, &timeout);
3289                        if (ret)
3290                                return ret;
3291
3292                        if (timeout == 0) {
3293                                index++;
3294                                continue;
3295                        } else if (timeout < 0) {
3296                                timeout = MAX_SCHEDULE_TIMEOUT;
3297                        } else {
3298                                timeout = msecs_to_jiffies(timeout);
3299                        }
3300
3301                        switch (index++) {
3302                        case 0:
3303                                adev->gfx_timeout = timeout;
3304                                break;
3305                        case 1:
3306                                adev->compute_timeout = timeout;
3307                                break;
3308                        case 2:
3309                                adev->sdma_timeout = timeout;
3310                                break;
3311                        case 3:
3312                                adev->video_timeout = timeout;
3313                                break;
3314                        default:
3315                                break;
3316                        }
3317                }
3318                /*
3319                 * There is only one value specified and
3320                 * it should apply to all non-compute jobs.
3321                 */
3322                if (index == 1) {
3323                        adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3324                        if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3325                                adev->compute_timeout = adev->gfx_timeout;
3326                }
3327        }
3328
3329        return ret;
3330}
3331
3332static const struct attribute *amdgpu_dev_attributes[] = {
3333        &dev_attr_product_name.attr,
3334        &dev_attr_product_number.attr,
3335        &dev_attr_serial_number.attr,
3336        &dev_attr_pcie_replay_count.attr,
3337        NULL
3338};
3339
3340/**
3341 * amdgpu_device_init - initialize the driver
3342 *
3343 * @adev: amdgpu_device pointer
3344 * @flags: driver flags
3345 *
3346 * Initializes the driver info and hw (all asics).
3347 * Returns 0 for success or an error on failure.
3348 * Called at driver startup.
3349 */
3350int amdgpu_device_init(struct amdgpu_device *adev,
3351                       uint32_t flags)
3352{
3353        struct drm_device *ddev = adev_to_drm(adev);
3354        struct pci_dev *pdev = adev->pdev;
3355        int r, i;
3356        bool px = false;
3357        u32 max_MBps;
3358
3359        adev->shutdown = false;
3360        adev->flags = flags;
3361
3362        if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3363                adev->asic_type = amdgpu_force_asic_type;
3364        else
3365                adev->asic_type = flags & AMD_ASIC_MASK;
3366
3367        adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3368        if (amdgpu_emu_mode == 1)
3369                adev->usec_timeout *= 10;
3370        adev->gmc.gart_size = 512 * 1024 * 1024;
3371        adev->accel_working = false;
3372        adev->num_rings = 0;
3373        adev->mman.buffer_funcs = NULL;
3374        adev->mman.buffer_funcs_ring = NULL;
3375        adev->vm_manager.vm_pte_funcs = NULL;
3376        adev->vm_manager.vm_pte_num_scheds = 0;
3377        adev->gmc.gmc_funcs = NULL;
3378        adev->harvest_ip_mask = 0x0;
3379        adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3380        bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3381
3382        adev->smc_rreg = &amdgpu_invalid_rreg;
3383        adev->smc_wreg = &amdgpu_invalid_wreg;
3384        adev->pcie_rreg = &amdgpu_invalid_rreg;
3385        adev->pcie_wreg = &amdgpu_invalid_wreg;
3386        adev->pciep_rreg = &amdgpu_invalid_rreg;
3387        adev->pciep_wreg = &amdgpu_invalid_wreg;
3388        adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3389        adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
3390        adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3391        adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3392        adev->didt_rreg = &amdgpu_invalid_rreg;
3393        adev->didt_wreg = &amdgpu_invalid_wreg;
3394        adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3395        adev->gc_cac_wreg = &amdgpu_invalid_wreg;
3396        adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3397        adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3398
3399        DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3400                 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3401                 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3402
3403        /* mutex initialization are all done here so we
3404         * can recall function without having locking issues */
3405        mutex_init(&adev->firmware.mutex);
3406        mutex_init(&adev->pm.mutex);
3407        mutex_init(&adev->gfx.gpu_clock_mutex);
3408        mutex_init(&adev->srbm_mutex);
3409        mutex_init(&adev->gfx.pipe_reserve_mutex);
3410        mutex_init(&adev->gfx.gfx_off_mutex);
3411        mutex_init(&adev->grbm_idx_mutex);
3412        mutex_init(&adev->mn_lock);
3413        mutex_init(&adev->virt.vf_errors.lock);
3414        hash_init(adev->mn_hash);
3415        atomic_set(&adev->in_gpu_reset, 0);
3416        init_rwsem(&adev->reset_sem);
3417        mutex_init(&adev->psp.mutex);
3418        mutex_init(&adev->notifier_lock);
3419
3420        r = amdgpu_device_init_apu_flags(adev);
3421        if (r)
3422                return r;
3423
3424        r = amdgpu_device_check_arguments(adev);
3425        if (r)
3426                return r;
3427
3428        spin_lock_init(&adev->mmio_idx_lock);
3429        spin_lock_init(&adev->smc_idx_lock);
3430        spin_lock_init(&adev->pcie_idx_lock);
3431        spin_lock_init(&adev->uvd_ctx_idx_lock);
3432        spin_lock_init(&adev->didt_idx_lock);
3433        spin_lock_init(&adev->gc_cac_idx_lock);
3434        spin_lock_init(&adev->se_cac_idx_lock);
3435        spin_lock_init(&adev->audio_endpt_idx_lock);
3436        spin_lock_init(&adev->mm_stats.lock);
3437
3438        INIT_LIST_HEAD(&adev->shadow_list);
3439        mutex_init(&adev->shadow_list_lock);
3440
3441        INIT_LIST_HEAD(&adev->reset_list);
3442
3443        INIT_DELAYED_WORK(&adev->delayed_init_work,
3444                          amdgpu_device_delayed_init_work_handler);
3445        INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3446                          amdgpu_device_delay_enable_gfx_off);
3447
3448        INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3449
3450        adev->gfx.gfx_off_req_count = 1;
3451        adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3452
3453        atomic_set(&adev->throttling_logging_enabled, 1);
3454        /*
3455         * If throttling continues, logging will be performed every minute
3456         * to avoid log flooding. "-1" is subtracted since the thermal
3457         * throttling interrupt comes every second. Thus, the total logging
3458         * interval is 59 seconds(retelimited printk interval) + 1(waiting
3459         * for throttling interrupt) = 60 seconds.
3460         */
3461        ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3462        ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3463
3464        /* Registers mapping */
3465        /* TODO: block userspace mapping of io register */
3466        if (adev->asic_type >= CHIP_BONAIRE) {
3467                adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3468                adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3469        } else {
3470                adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3471                adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3472        }
3473
3474        adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3475        if (adev->rmmio == NULL) {
3476                return -ENOMEM;
3477        }
3478        DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3479        DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3480
3481        /* enable PCIE atomic ops */
3482        r = pci_enable_atomic_ops_to_root(adev->pdev,
3483                                          PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3484                                          PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3485        if (r) {
3486                adev->have_atomics_support = false;
3487                DRM_INFO("PCIE atomic ops is not supported\n");
3488        } else {
3489                adev->have_atomics_support = true;
3490        }
3491
3492        amdgpu_device_get_pcie_info(adev);
3493
3494        if (amdgpu_mcbp)
3495                DRM_INFO("MCBP is enabled\n");
3496
3497        if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3498                adev->enable_mes = true;
3499
3500        /* detect hw virtualization here */
3501        amdgpu_detect_virtualization(adev);
3502
3503        r = amdgpu_device_get_job_timeout_settings(adev);
3504        if (r) {
3505                dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3506                return r;
3507        }
3508
3509        /* early init functions */
3510        r = amdgpu_device_ip_early_init(adev);
3511        if (r)
3512                return r;
3513
3514        /* doorbell bar mapping and doorbell index init*/
3515        amdgpu_device_doorbell_init(adev);
3516
3517        if (amdgpu_emu_mode == 1) {
3518                /* post the asic on emulation mode */
3519                emu_soc_asic_init(adev);
3520                goto fence_driver_init;
3521        }
3522
3523        amdgpu_reset_init(adev);
3524
3525        /* detect if we are with an SRIOV vbios */
3526        amdgpu_device_detect_sriov_bios(adev);
3527
3528        /* check if we need to reset the asic
3529         *  E.g., driver was not cleanly unloaded previously, etc.
3530         */
3531        if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3532                if (adev->gmc.xgmi.num_physical_nodes) {
3533                        dev_info(adev->dev, "Pending hive reset.\n");
3534                        adev->gmc.xgmi.pending_reset = true;
3535                        /* Only need to init necessary block for SMU to handle the reset */
3536                        for (i = 0; i < adev->num_ip_blocks; i++) {
3537                                if (!adev->ip_blocks[i].status.valid)
3538                                        continue;
3539                                if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3540                                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3541                                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3542                                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
3543                                        DRM_DEBUG("IP %s disabled for hw_init.\n",
3544                                                adev->ip_blocks[i].version->funcs->name);
3545                                        adev->ip_blocks[i].status.hw = true;
3546                                }
3547                        }
3548                } else {
3549                        r = amdgpu_asic_reset(adev);
3550                        if (r) {
3551                                dev_err(adev->dev, "asic reset on init failed\n");
3552                                goto failed;
3553                        }
3554                }
3555        }
3556
3557        pci_enable_pcie_error_reporting(adev->pdev);
3558
3559        /* Post card if necessary */
3560        if (amdgpu_device_need_post(adev)) {
3561                if (!adev->bios) {
3562                        dev_err(adev->dev, "no vBIOS found\n");
3563                        r = -EINVAL;
3564                        goto failed;
3565                }
3566                DRM_INFO("GPU posting now...\n");
3567                r = amdgpu_device_asic_init(adev);
3568                if (r) {
3569                        dev_err(adev->dev, "gpu post error!\n");
3570                        goto failed;
3571                }
3572        }
3573
3574        if (adev->is_atom_fw) {
3575                /* Initialize clocks */
3576                r = amdgpu_atomfirmware_get_clock_info(adev);
3577                if (r) {
3578                        dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3579                        amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3580                        goto failed;
3581                }
3582        } else {
3583                /* Initialize clocks */
3584                r = amdgpu_atombios_get_clock_info(adev);
3585                if (r) {
3586                        dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3587                        amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3588                        goto failed;
3589                }
3590                /* init i2c buses */
3591                if (!amdgpu_device_has_dc_support(adev))
3592                        amdgpu_atombios_i2c_init(adev);
3593        }
3594
3595fence_driver_init:
3596        /* Fence driver */
3597        r = amdgpu_fence_driver_init(adev);
3598        if (r) {
3599                dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
3600                amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
3601                goto failed;
3602        }
3603
3604        /* init the mode config */
3605        drm_mode_config_init(adev_to_drm(adev));
3606
3607        r = amdgpu_device_ip_init(adev);
3608        if (r) {
3609                /* failed in exclusive mode due to timeout */
3610                if (amdgpu_sriov_vf(adev) &&
3611                    !amdgpu_sriov_runtime(adev) &&
3612                    amdgpu_virt_mmio_blocked(adev) &&
3613                    !amdgpu_virt_wait_reset(adev)) {
3614                        dev_err(adev->dev, "VF exclusive mode timeout\n");
3615                        /* Don't send request since VF is inactive. */
3616                        adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3617                        adev->virt.ops = NULL;
3618                        r = -EAGAIN;
3619                        goto release_ras_con;
3620                }
3621                dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
3622                amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
3623                goto release_ras_con;
3624        }
3625
3626        dev_info(adev->dev,
3627                "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3628                        adev->gfx.config.max_shader_engines,
3629                        adev->gfx.config.max_sh_per_se,
3630                        adev->gfx.config.max_cu_per_sh,
3631                        adev->gfx.cu_info.number);
3632
3633        adev->accel_working = true;
3634
3635        amdgpu_vm_check_compute_bug(adev);
3636
3637        /* Initialize the buffer migration limit. */
3638        if (amdgpu_moverate >= 0)
3639                max_MBps = amdgpu_moverate;
3640        else
3641                max_MBps = 8; /* Allow 8 MB/s. */
3642        /* Get a log2 for easy divisions. */
3643        adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3644
3645        amdgpu_fbdev_init(adev);
3646
3647        r = amdgpu_pm_sysfs_init(adev);
3648        if (r) {
3649                adev->pm_sysfs_en = false;
3650                DRM_ERROR("registering pm debugfs failed (%d).\n", r);
3651        } else
3652                adev->pm_sysfs_en = true;
3653
3654        r = amdgpu_ucode_sysfs_init(adev);
3655        if (r) {
3656                adev->ucode_sysfs_en = false;
3657                DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
3658        } else
3659                adev->ucode_sysfs_en = true;
3660
3661        if ((amdgpu_testing & 1)) {
3662                if (adev->accel_working)
3663                        amdgpu_test_moves(adev);
3664                else
3665                        DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3666        }
3667        if (amdgpu_benchmarking) {
3668                if (adev->accel_working)
3669                        amdgpu_benchmark(adev, amdgpu_benchmarking);
3670                else
3671                        DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3672        }
3673
3674        /*
3675         * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3676         * Otherwise the mgpu fan boost feature will be skipped due to the
3677         * gpu instance is counted less.
3678         */
3679        amdgpu_register_gpu_instance(adev);
3680
3681        /* enable clockgating, etc. after ib tests, etc. since some blocks require
3682         * explicit gating rather than handling it automatically.
3683         */
3684        if (!adev->gmc.xgmi.pending_reset) {
3685                r = amdgpu_device_ip_late_init(adev);
3686                if (r) {
3687                        dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3688                        amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
3689                        goto release_ras_con;
3690                }
3691                /* must succeed. */
3692                amdgpu_ras_resume(adev);
3693                queue_delayed_work(system_wq, &adev->delayed_init_work,
3694                                   msecs_to_jiffies(AMDGPU_RESUME_MS));
3695        }
3696
3697        if (amdgpu_sriov_vf(adev))
3698                flush_delayed_work(&adev->delayed_init_work);
3699
3700        r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
3701        if (r)
3702                dev_err(adev->dev, "Could not create amdgpu device attr\n");
3703
3704        if (IS_ENABLED(CONFIG_PERF_EVENTS))
3705                r = amdgpu_pmu_init(adev);
3706        if (r)
3707                dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3708
3709        /* Have stored pci confspace at hand for restore in sudden PCI error */
3710        if (amdgpu_device_cache_pci_state(adev->pdev))
3711                pci_restore_state(pdev);
3712
3713        /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3714        /* this will fail for cards that aren't VGA class devices, just
3715         * ignore it */
3716        if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3717                vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
3718
3719        if (amdgpu_device_supports_px(ddev)) {
3720                px = true;
3721                vga_switcheroo_register_client(adev->pdev,
3722                                               &amdgpu_switcheroo_ops, px);
3723                vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3724        }
3725
3726        if (adev->gmc.xgmi.pending_reset)
3727                queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3728                                   msecs_to_jiffies(AMDGPU_RESUME_MS));
3729
3730        return 0;
3731
3732release_ras_con:
3733        amdgpu_release_ras_context(adev);
3734
3735failed:
3736        amdgpu_vf_error_trans_all(adev);
3737
3738        return r;
3739}
3740
3741static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
3742{
3743        /* Clear all CPU mappings pointing to this device */
3744        unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
3745
3746        /* Unmap all mapped bars - Doorbell, registers and VRAM */
3747        amdgpu_device_doorbell_fini(adev);
3748
3749        iounmap(adev->rmmio);
3750        adev->rmmio = NULL;
3751        if (adev->mman.aper_base_kaddr)
3752                iounmap(adev->mman.aper_base_kaddr);
3753        adev->mman.aper_base_kaddr = NULL;
3754
3755        /* Memory manager related */
3756        if (!adev->gmc.xgmi.connected_to_cpu) {
3757                arch_phys_wc_del(adev->gmc.vram_mtrr);
3758                arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
3759        }
3760}
3761
3762/**
3763 * amdgpu_device_fini - tear down the driver
3764 *
3765 * @adev: amdgpu_device pointer
3766 *
3767 * Tear down the driver info (all asics).
3768 * Called at driver shutdown.
3769 */
3770void amdgpu_device_fini_hw(struct amdgpu_device *adev)
3771{
3772        dev_info(adev->dev, "amdgpu: finishing device.\n");
3773        flush_delayed_work(&adev->delayed_init_work);
3774        ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
3775        adev->shutdown = true;
3776
3777        /* make sure IB test finished before entering exclusive mode
3778         * to avoid preemption on IB test
3779         * */
3780        if (amdgpu_sriov_vf(adev)) {
3781                amdgpu_virt_request_full_gpu(adev, false);
3782                amdgpu_virt_fini_data_exchange(adev);
3783        }
3784
3785        /* disable all interrupts */
3786        amdgpu_irq_disable_all(adev);
3787        if (adev->mode_info.mode_config_initialized){
3788                if (!amdgpu_device_has_dc_support(adev))
3789                        drm_helper_force_disable_all(adev_to_drm(adev));
3790                else
3791                        drm_atomic_helper_shutdown(adev_to_drm(adev));
3792        }
3793        amdgpu_fence_driver_fini_hw(adev);
3794
3795        if (adev->pm_sysfs_en)
3796                amdgpu_pm_sysfs_fini(adev);
3797        if (adev->ucode_sysfs_en)
3798                amdgpu_ucode_sysfs_fini(adev);
3799        sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
3800
3801        amdgpu_fbdev_fini(adev);
3802
3803        amdgpu_irq_fini_hw(adev);
3804
3805        amdgpu_device_ip_fini_early(adev);
3806
3807        amdgpu_gart_dummy_page_fini(adev);
3808
3809        amdgpu_device_unmap_mmio(adev);
3810}
3811
3812void amdgpu_device_fini_sw(struct amdgpu_device *adev)
3813{
3814        amdgpu_device_ip_fini(adev);
3815        amdgpu_fence_driver_fini_sw(adev);
3816        release_firmware(adev->firmware.gpu_info_fw);
3817        adev->firmware.gpu_info_fw = NULL;
3818        adev->accel_working = false;
3819
3820        amdgpu_reset_fini(adev);
3821
3822        /* free i2c buses */
3823        if (!amdgpu_device_has_dc_support(adev))
3824                amdgpu_i2c_fini(adev);
3825
3826        if (amdgpu_emu_mode != 1)
3827                amdgpu_atombios_fini(adev);
3828
3829        kfree(adev->bios);
3830        adev->bios = NULL;
3831        if (amdgpu_device_supports_px(adev_to_drm(adev))) {
3832                vga_switcheroo_unregister_client(adev->pdev);
3833                vga_switcheroo_fini_domain_pm_ops(adev->dev);
3834        }
3835        if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3836                vga_client_register(adev->pdev, NULL, NULL, NULL);
3837
3838        if (IS_ENABLED(CONFIG_PERF_EVENTS))
3839                amdgpu_pmu_fini(adev);
3840        if (adev->mman.discovery_bin)
3841                amdgpu_discovery_fini(adev);
3842
3843        kfree(adev->pci_state);
3844
3845}
3846
3847
3848/*
3849 * Suspend & resume.
3850 */
3851/**
3852 * amdgpu_device_suspend - initiate device suspend
3853 *
3854 * @dev: drm dev pointer
3855 * @fbcon : notify the fbdev of suspend
3856 *
3857 * Puts the hw in the suspend state (all asics).
3858 * Returns 0 for success or an error on failure.
3859 * Called at driver suspend.
3860 */
3861int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
3862{
3863        struct amdgpu_device *adev = drm_to_adev(dev);
3864
3865        if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3866                return 0;
3867
3868        adev->in_suspend = true;
3869
3870        if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
3871                DRM_WARN("smart shift update failed\n");
3872
3873        drm_kms_helper_poll_disable(dev);
3874
3875        if (fbcon)
3876                amdgpu_fbdev_set_suspend(adev, 1);
3877
3878        cancel_delayed_work_sync(&adev->delayed_init_work);
3879
3880        amdgpu_ras_suspend(adev);
3881
3882        amdgpu_device_ip_suspend_phase1(adev);
3883
3884        if (!adev->in_s0ix)
3885                amdgpu_amdkfd_suspend(adev, adev->in_runpm);
3886
3887        /* evict vram memory */
3888        amdgpu_bo_evict_vram(adev);
3889
3890        amdgpu_fence_driver_suspend(adev);
3891
3892        amdgpu_device_ip_suspend_phase2(adev);
3893        /* evict remaining vram memory
3894         * This second call to evict vram is to evict the gart page table
3895         * using the CPU.
3896         */
3897        amdgpu_bo_evict_vram(adev);
3898
3899        return 0;
3900}
3901
3902/**
3903 * amdgpu_device_resume - initiate device resume
3904 *
3905 * @dev: drm dev pointer
3906 * @fbcon : notify the fbdev of resume
3907 *
3908 * Bring the hw back to operating state (all asics).
3909 * Returns 0 for success or an error on failure.
3910 * Called at driver resume.
3911 */
3912int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
3913{
3914        struct amdgpu_device *adev = drm_to_adev(dev);
3915        int r = 0;
3916
3917        if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3918                return 0;
3919
3920        if (adev->in_s0ix)
3921                amdgpu_gfx_state_change_set(adev, sGpuChangeState_D0Entry);
3922
3923        /* post card */
3924        if (amdgpu_device_need_post(adev)) {
3925                r = amdgpu_device_asic_init(adev);
3926                if (r)
3927                        dev_err(adev->dev, "amdgpu asic init failed\n");
3928        }
3929
3930        r = amdgpu_device_ip_resume(adev);
3931        if (r) {
3932                dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3933                return r;
3934        }
3935        amdgpu_fence_driver_resume(adev);
3936
3937
3938        r = amdgpu_device_ip_late_init(adev);
3939        if (r)
3940                return r;
3941
3942        queue_delayed_work(system_wq, &adev->delayed_init_work,
3943                           msecs_to_jiffies(AMDGPU_RESUME_MS));
3944
3945        if (!adev->in_s0ix) {
3946                r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
3947                if (r)
3948                        return r;
3949        }
3950
3951        /* Make sure IB tests flushed */
3952        flush_delayed_work(&adev->delayed_init_work);
3953
3954        if (fbcon)
3955                amdgpu_fbdev_set_suspend(adev, 0);
3956
3957        drm_kms_helper_poll_enable(dev);
3958
3959        amdgpu_ras_resume(adev);
3960
3961        /*
3962         * Most of the connector probing functions try to acquire runtime pm
3963         * refs to ensure that the GPU is powered on when connector polling is
3964         * performed. Since we're calling this from a runtime PM callback,
3965         * trying to acquire rpm refs will cause us to deadlock.
3966         *
3967         * Since we're guaranteed to be holding the rpm lock, it's safe to
3968         * temporarily disable the rpm helpers so this doesn't deadlock us.
3969         */
3970#ifdef CONFIG_PM
3971        dev->dev->power.disable_depth++;
3972#endif
3973        if (!amdgpu_device_has_dc_support(adev))
3974                drm_helper_hpd_irq_event(dev);
3975        else
3976                drm_kms_helper_hotplug_event(dev);
3977#ifdef CONFIG_PM
3978        dev->dev->power.disable_depth--;
3979#endif
3980        adev->in_suspend = false;
3981
3982        if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
3983                DRM_WARN("smart shift update failed\n");
3984
3985        return 0;
3986}
3987
3988/**
3989 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3990 *
3991 * @adev: amdgpu_device pointer
3992 *
3993 * The list of all the hardware IPs that make up the asic is walked and
3994 * the check_soft_reset callbacks are run.  check_soft_reset determines
3995 * if the asic is still hung or not.
3996 * Returns true if any of the IPs are still in a hung state, false if not.
3997 */
3998static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
3999{
4000        int i;
4001        bool asic_hang = false;
4002
4003        if (amdgpu_sriov_vf(adev))
4004                return true;
4005
4006        if (amdgpu_asic_need_full_reset(adev))
4007                return true;
4008
4009        for (i = 0; i < adev->num_ip_blocks; i++) {
4010                if (!adev->ip_blocks[i].status.valid)
4011                        continue;
4012                if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4013                        adev->ip_blocks[i].status.hang =
4014                                adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4015                if (adev->ip_blocks[i].status.hang) {
4016                        dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4017                        asic_hang = true;
4018                }
4019        }
4020        return asic_hang;
4021}
4022
4023/**
4024 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4025 *
4026 * @adev: amdgpu_device pointer
4027 *
4028 * The list of all the hardware IPs that make up the asic is walked and the
4029 * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4030 * handles any IP specific hardware or software state changes that are
4031 * necessary for a soft reset to succeed.
4032 * Returns 0 on success, negative error code on failure.
4033 */
4034static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4035{
4036        int i, r = 0;
4037
4038        for (i = 0; i < adev->num_ip_blocks; i++) {
4039                if (!adev->ip_blocks[i].status.valid)
4040                        continue;
4041                if (adev->ip_blocks[i].status.hang &&
4042                    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4043                        r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4044                        if (r)
4045                                return r;
4046                }
4047        }
4048
4049        return 0;
4050}
4051
4052/**
4053 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4054 *
4055 * @adev: amdgpu_device pointer
4056 *
4057 * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4058 * reset is necessary to recover.
4059 * Returns true if a full asic reset is required, false if not.
4060 */
4061static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4062{
4063        int i;
4064
4065        if (amdgpu_asic_need_full_reset(adev))
4066                return true;
4067
4068        for (i = 0; i < adev->num_ip_blocks; i++) {
4069                if (!adev->ip_blocks[i].status.valid)
4070                        continue;
4071                if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4072                    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4073                    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4074                    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4075                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4076                        if (adev->ip_blocks[i].status.hang) {
4077                                dev_info(adev->dev, "Some block need full reset!\n");
4078                                return true;
4079                        }
4080                }
4081        }
4082        return false;
4083}
4084
4085/**
4086 * amdgpu_device_ip_soft_reset - do a soft reset
4087 *
4088 * @adev: amdgpu_device pointer
4089 *
4090 * The list of all the hardware IPs that make up the asic is walked and the
4091 * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4092 * IP specific hardware or software state changes that are necessary to soft
4093 * reset the IP.
4094 * Returns 0 on success, negative error code on failure.
4095 */
4096static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4097{
4098        int i, r = 0;
4099
4100        for (i = 0; i < adev->num_ip_blocks; i++) {
4101                if (!adev->ip_blocks[i].status.valid)
4102                        continue;
4103                if (adev->ip_blocks[i].status.hang &&
4104                    adev->ip_blocks[i].version->funcs->soft_reset) {
4105                        r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4106                        if (r)
4107                                return r;
4108                }
4109        }
4110
4111        return 0;
4112}
4113
4114/**
4115 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4116 *
4117 * @adev: amdgpu_device pointer
4118 *
4119 * The list of all the hardware IPs that make up the asic is walked and the
4120 * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4121 * handles any IP specific hardware or software state changes that are
4122 * necessary after the IP has been soft reset.
4123 * Returns 0 on success, negative error code on failure.
4124 */
4125static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4126{
4127        int i, r = 0;
4128
4129        for (i = 0; i < adev->num_ip_blocks; i++) {
4130                if (!adev->ip_blocks[i].status.valid)
4131                        continue;
4132                if (adev->ip_blocks[i].status.hang &&
4133                    adev->ip_blocks[i].version->funcs->post_soft_reset)
4134                        r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4135                if (r)
4136                        return r;
4137        }
4138
4139        return 0;
4140}
4141
4142/**
4143 * amdgpu_device_recover_vram - Recover some VRAM contents
4144 *
4145 * @adev: amdgpu_device pointer
4146 *
4147 * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
4148 * restore things like GPUVM page tables after a GPU reset where
4149 * the contents of VRAM might be lost.
4150 *
4151 * Returns:
4152 * 0 on success, negative error code on failure.
4153 */
4154static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4155{
4156        struct dma_fence *fence = NULL, *next = NULL;
4157        struct amdgpu_bo *shadow;
4158        struct amdgpu_bo_vm *vmbo;
4159        long r = 1, tmo;
4160
4161        if (amdgpu_sriov_runtime(adev))
4162                tmo = msecs_to_jiffies(8000);
4163        else
4164                tmo = msecs_to_jiffies(100);
4165
4166        dev_info(adev->dev, "recover vram bo from shadow start\n");
4167        mutex_lock(&adev->shadow_list_lock);
4168        list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4169                shadow = &vmbo->bo;
4170                /* No need to recover an evicted BO */
4171                if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4172                    shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4173                    sha