linux/drivers/gpu/drm/i915/gt/intel_mocs.c
<<
>>
Prefs
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2015 Intel Corporation
   4 */
   5
   6#include "i915_drv.h"
   7
   8#include "intel_engine.h"
   9#include "intel_gt.h"
  10#include "intel_lrc_reg.h"
  11#include "intel_mocs.h"
  12#include "intel_ring.h"
  13
  14/* structures required */
  15struct drm_i915_mocs_entry {
  16        u32 control_value;
  17        u16 l3cc_value;
  18        u16 used;
  19};
  20
  21struct drm_i915_mocs_table {
  22        unsigned int size;
  23        unsigned int n_entries;
  24        const struct drm_i915_mocs_entry *table;
  25};
  26
  27/* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
  28#define _LE_CACHEABILITY(value) ((value) << 0)
  29#define _LE_TGT_CACHE(value)    ((value) << 2)
  30#define LE_LRUM(value)          ((value) << 4)
  31#define LE_AOM(value)           ((value) << 6)
  32#define LE_RSC(value)           ((value) << 7)
  33#define LE_SCC(value)           ((value) << 8)
  34#define LE_PFM(value)           ((value) << 11)
  35#define LE_SCF(value)           ((value) << 14)
  36#define LE_COS(value)           ((value) << 15)
  37#define LE_SSE(value)           ((value) << 17)
  38
  39/* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
  40#define L3_ESC(value)           ((value) << 0)
  41#define L3_SCC(value)           ((value) << 1)
  42#define _L3_CACHEABILITY(value) ((value) << 4)
  43
  44/* Helper defines */
  45#define GEN9_NUM_MOCS_ENTRIES   64  /* 63-64 are reserved, but configured. */
  46
  47/* (e)LLC caching options */
  48/*
  49 * Note: LE_0_PAGETABLE works only up to Gen11; for newer gens it means
  50 * the same as LE_UC
  51 */
  52#define LE_0_PAGETABLE          _LE_CACHEABILITY(0)
  53#define LE_1_UC                 _LE_CACHEABILITY(1)
  54#define LE_2_WT                 _LE_CACHEABILITY(2)
  55#define LE_3_WB                 _LE_CACHEABILITY(3)
  56
  57/* Target cache */
  58#define LE_TC_0_PAGETABLE       _LE_TGT_CACHE(0)
  59#define LE_TC_1_LLC             _LE_TGT_CACHE(1)
  60#define LE_TC_2_LLC_ELLC        _LE_TGT_CACHE(2)
  61#define LE_TC_3_LLC_ELLC_ALT    _LE_TGT_CACHE(3)
  62
  63/* L3 caching options */
  64#define L3_0_DIRECT             _L3_CACHEABILITY(0)
  65#define L3_1_UC                 _L3_CACHEABILITY(1)
  66#define L3_2_RESERVED           _L3_CACHEABILITY(2)
  67#define L3_3_WB                 _L3_CACHEABILITY(3)
  68
  69#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
  70        [__idx] = { \
  71                .control_value = __control_value, \
  72                .l3cc_value = __l3cc_value, \
  73                .used = 1, \
  74        }
  75
  76/*
  77 * MOCS tables
  78 *
  79 * These are the MOCS tables that are programmed across all the rings.
  80 * The control value is programmed to all the rings that support the
  81 * MOCS registers. While the l3cc_values are only programmed to the
  82 * LNCFCMOCS0 - LNCFCMOCS32 registers.
  83 *
  84 * These tables are intended to be kept reasonably consistent across
  85 * HW platforms, and for ICL+, be identical across OSes. To achieve
  86 * that, for Icelake and above, list of entries is published as part
  87 * of bspec.
  88 *
  89 * Entries not part of the following tables are undefined as far as
  90 * userspace is concerned and shouldn't be relied upon.  For Gen < 12
  91 * they will be initialized to PTE. Gen >= 12 onwards don't have a setting for
  92 * PTE and will be initialized to an invalid value.
  93 *
  94 * The last few entries are reserved by the hardware. For ICL+ they
  95 * should be initialized according to bspec and never used, for older
  96 * platforms they should never be written to.
  97 *
  98 * NOTE: These tables are part of bspec and defined as part of hardware
  99 *       interface for ICL+. For older platforms, they are part of kernel
 100 *       ABI. It is expected that, for specific hardware platform, existing
 101 *       entries will remain constant and the table will only be updated by
 102 *       adding new entries, filling unused positions.
 103 */
 104#define GEN9_MOCS_ENTRIES \
 105        MOCS_ENTRY(I915_MOCS_UNCACHED, \
 106                   LE_1_UC | LE_TC_2_LLC_ELLC, \
 107                   L3_1_UC), \
 108        MOCS_ENTRY(I915_MOCS_PTE, \
 109                   LE_0_PAGETABLE | LE_TC_0_PAGETABLE | LE_LRUM(3), \
 110                   L3_3_WB)
 111
 112static const struct drm_i915_mocs_entry skl_mocs_table[] = {
 113        GEN9_MOCS_ENTRIES,
 114        MOCS_ENTRY(I915_MOCS_CACHED,
 115                   LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3),
 116                   L3_3_WB),
 117
 118        /*
 119         * mocs:63
 120         * - used by the L3 for all of its evictions.
 121         *   Thus it is expected to allow LLC cacheability to enable coherent
 122         *   flows to be maintained.
 123         * - used to force L3 uncachable cycles.
 124         *   Thus it is expected to make the surface L3 uncacheable.
 125         */
 126        MOCS_ENTRY(63,
 127                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
 128                   L3_1_UC)
 129};
 130
 131/* NOTE: the LE_TGT_CACHE is not used on Broxton */
 132static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
 133        GEN9_MOCS_ENTRIES,
 134        MOCS_ENTRY(I915_MOCS_CACHED,
 135                   LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3),
 136                   L3_3_WB)
 137};
 138
 139#define GEN11_MOCS_ENTRIES \
 140        /* Entries 0 and 1 are defined per-platform */ \
 141        /* Base - L3 + LLC */ \
 142        MOCS_ENTRY(2, \
 143                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
 144                   L3_3_WB), \
 145        /* Base - Uncached */ \
 146        MOCS_ENTRY(3, \
 147                   LE_1_UC | LE_TC_1_LLC, \
 148                   L3_1_UC), \
 149        /* Base - L3 */ \
 150        MOCS_ENTRY(4, \
 151                   LE_1_UC | LE_TC_1_LLC, \
 152                   L3_3_WB), \
 153        /* Base - LLC */ \
 154        MOCS_ENTRY(5, \
 155                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
 156                   L3_1_UC), \
 157        /* Age 0 - LLC */ \
 158        MOCS_ENTRY(6, \
 159                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
 160                   L3_1_UC), \
 161        /* Age 0 - L3 + LLC */ \
 162        MOCS_ENTRY(7, \
 163                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
 164                   L3_3_WB), \
 165        /* Age: Don't Chg. - LLC */ \
 166        MOCS_ENTRY(8, \
 167                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
 168                   L3_1_UC), \
 169        /* Age: Don't Chg. - L3 + LLC */ \
 170        MOCS_ENTRY(9, \
 171                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
 172                   L3_3_WB), \
 173        /* No AOM - LLC */ \
 174        MOCS_ENTRY(10, \
 175                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
 176                   L3_1_UC), \
 177        /* No AOM - L3 + LLC */ \
 178        MOCS_ENTRY(11, \
 179                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
 180                   L3_3_WB), \
 181        /* No AOM; Age 0 - LLC */ \
 182        MOCS_ENTRY(12, \
 183                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
 184                   L3_1_UC), \
 185        /* No AOM; Age 0 - L3 + LLC */ \
 186        MOCS_ENTRY(13, \
 187                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
 188                   L3_3_WB), \
 189        /* No AOM; Age:DC - LLC */ \
 190        MOCS_ENTRY(14, \
 191                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
 192                   L3_1_UC), \
 193        /* No AOM; Age:DC - L3 + LLC */ \
 194        MOCS_ENTRY(15, \
 195                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
 196                   L3_3_WB), \
 197        /* Self-Snoop - L3 + LLC */ \
 198        MOCS_ENTRY(18, \
 199                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
 200                   L3_3_WB), \
 201        /* Skip Caching - L3 + LLC(12.5%) */ \
 202        MOCS_ENTRY(19, \
 203                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \
 204                   L3_3_WB), \
 205        /* Skip Caching - L3 + LLC(25%) */ \
 206        MOCS_ENTRY(20, \
 207                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \
 208                   L3_3_WB), \
 209        /* Skip Caching - L3 + LLC(50%) */ \
 210        MOCS_ENTRY(21, \
 211                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \
 212                   L3_3_WB), \
 213        /* Skip Caching - L3 + LLC(75%) */ \
 214        MOCS_ENTRY(22, \
 215                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \
 216                   L3_3_WB), \
 217        /* Skip Caching - L3 + LLC(87.5%) */ \
 218        MOCS_ENTRY(23, \
 219                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \
 220                   L3_3_WB), \
 221        /* HW Reserved - SW program but never use */ \
 222        MOCS_ENTRY(62, \
 223                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
 224                   L3_1_UC), \
 225        /* HW Reserved - SW program but never use */ \
 226        MOCS_ENTRY(63, \
 227                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
 228                   L3_1_UC)
 229
 230static const struct drm_i915_mocs_entry tgl_mocs_table[] = {
 231        /*
 232         * NOTE:
 233         * Reserved and unspecified MOCS indices have been set to (L3 + LCC).
 234         * These reserved entries should never be used, they may be changed
 235         * to low performant variants with better coherency in the future if
 236         * more entries are needed. We are programming index I915_MOCS_PTE(1)
 237         * only, __init_mocs_table() take care to program unused index with
 238         * this entry.
 239         */
 240        MOCS_ENTRY(I915_MOCS_PTE,
 241                   LE_0_PAGETABLE | LE_TC_0_PAGETABLE,
 242                   L3_1_UC),
 243        GEN11_MOCS_ENTRIES,
 244
 245        /* Implicitly enable L1 - HDC:L1 + L3 + LLC */
 246        MOCS_ENTRY(48,
 247                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
 248                   L3_3_WB),
 249        /* Implicitly enable L1 - HDC:L1 + L3 */
 250        MOCS_ENTRY(49,
 251                   LE_1_UC | LE_TC_1_LLC,
 252                   L3_3_WB),
 253        /* Implicitly enable L1 - HDC:L1 + LLC */
 254        MOCS_ENTRY(50,
 255                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
 256                   L3_1_UC),
 257        /* Implicitly enable L1 - HDC:L1 */
 258        MOCS_ENTRY(51,
 259                   LE_1_UC | LE_TC_1_LLC,
 260                   L3_1_UC),
 261        /* HW Special Case (CCS) */
 262        MOCS_ENTRY(60,
 263                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
 264                   L3_1_UC),
 265        /* HW Special Case (Displayable) */
 266        MOCS_ENTRY(61,
 267                   LE_1_UC | LE_TC_1_LLC,
 268                   L3_3_WB),
 269};
 270
 271static const struct drm_i915_mocs_entry icl_mocs_table[] = {
 272        /* Base - Uncached (Deprecated) */
 273        MOCS_ENTRY(I915_MOCS_UNCACHED,
 274                   LE_1_UC | LE_TC_1_LLC,
 275                   L3_1_UC),
 276        /* Base - L3 + LeCC:PAT (Deprecated) */
 277        MOCS_ENTRY(I915_MOCS_PTE,
 278                   LE_0_PAGETABLE | LE_TC_0_PAGETABLE,
 279                   L3_3_WB),
 280
 281        GEN11_MOCS_ENTRIES
 282};
 283
 284static const struct drm_i915_mocs_entry dg1_mocs_table[] = {
 285        /* Error */
 286        MOCS_ENTRY(0, 0, L3_0_DIRECT),
 287
 288        /* UC */
 289        MOCS_ENTRY(1, 0, L3_1_UC),
 290
 291        /* Reserved */
 292        MOCS_ENTRY(2, 0, L3_0_DIRECT),
 293        MOCS_ENTRY(3, 0, L3_0_DIRECT),
 294        MOCS_ENTRY(4, 0, L3_0_DIRECT),
 295
 296        /* WB - L3 */
 297        MOCS_ENTRY(5, 0, L3_3_WB),
 298        /* WB - L3 50% */
 299        MOCS_ENTRY(6, 0, L3_ESC(1) | L3_SCC(1) | L3_3_WB),
 300        /* WB - L3 25% */
 301        MOCS_ENTRY(7, 0, L3_ESC(1) | L3_SCC(3) | L3_3_WB),
 302        /* WB - L3 12.5% */
 303        MOCS_ENTRY(8, 0, L3_ESC(1) | L3_SCC(7) | L3_3_WB),
 304
 305        /* HDC:L1 + L3 */
 306        MOCS_ENTRY(48, 0, L3_3_WB),
 307        /* HDC:L1 */
 308        MOCS_ENTRY(49, 0, L3_1_UC),
 309
 310        /* HW Reserved */
 311        MOCS_ENTRY(60, 0, L3_1_UC),
 312        MOCS_ENTRY(61, 0, L3_1_UC),
 313        MOCS_ENTRY(62, 0, L3_1_UC),
 314        MOCS_ENTRY(63, 0, L3_1_UC),
 315};
 316
 317enum {
 318        HAS_GLOBAL_MOCS = BIT(0),
 319        HAS_ENGINE_MOCS = BIT(1),
 320        HAS_RENDER_L3CC = BIT(2),
 321};
 322
 323static bool has_l3cc(const struct drm_i915_private *i915)
 324{
 325        return true;
 326}
 327
 328static bool has_global_mocs(const struct drm_i915_private *i915)
 329{
 330        return HAS_GLOBAL_MOCS_REGISTERS(i915);
 331}
 332
 333static bool has_mocs(const struct drm_i915_private *i915)
 334{
 335        return !IS_DGFX(i915);
 336}
 337
 338static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
 339                                      struct drm_i915_mocs_table *table)
 340{
 341        unsigned int flags;
 342
 343        if (IS_DG1(i915)) {
 344                table->size = ARRAY_SIZE(dg1_mocs_table);
 345                table->table = dg1_mocs_table;
 346                table->n_entries = GEN9_NUM_MOCS_ENTRIES;
 347        } else if (GRAPHICS_VER(i915) >= 12) {
 348                table->size  = ARRAY_SIZE(tgl_mocs_table);
 349                table->table = tgl_mocs_table;
 350                table->n_entries = GEN9_NUM_MOCS_ENTRIES;
 351        } else if (GRAPHICS_VER(i915) == 11) {
 352                table->size  = ARRAY_SIZE(icl_mocs_table);
 353                table->table = icl_mocs_table;
 354                table->n_entries = GEN9_NUM_MOCS_ENTRIES;
 355        } else if (IS_GEN9_BC(i915) || IS_CANNONLAKE(i915)) {
 356                table->size  = ARRAY_SIZE(skl_mocs_table);
 357                table->n_entries = GEN9_NUM_MOCS_ENTRIES;
 358                table->table = skl_mocs_table;
 359        } else if (IS_GEN9_LP(i915)) {
 360                table->size  = ARRAY_SIZE(broxton_mocs_table);
 361                table->n_entries = GEN9_NUM_MOCS_ENTRIES;
 362                table->table = broxton_mocs_table;
 363        } else {
 364                drm_WARN_ONCE(&i915->drm, GRAPHICS_VER(i915) >= 9,
 365                              "Platform that should have a MOCS table does not.\n");
 366                return 0;
 367        }
 368
 369        if (GEM_DEBUG_WARN_ON(table->size > table->n_entries))
 370                return 0;
 371
 372        /* WaDisableSkipCaching:skl,bxt,kbl,glk */
 373        if (GRAPHICS_VER(i915) == 9) {
 374                int i;
 375
 376                for (i = 0; i < table->size; i++)
 377                        if (GEM_DEBUG_WARN_ON(table->table[i].l3cc_value &
 378                                              (L3_ESC(1) | L3_SCC(0x7))))
 379                                return 0;
 380        }
 381
 382        flags = 0;
 383        if (has_mocs(i915)) {
 384                if (has_global_mocs(i915))
 385                        flags |= HAS_GLOBAL_MOCS;
 386                else
 387                        flags |= HAS_ENGINE_MOCS;
 388        }
 389        if (has_l3cc(i915))
 390                flags |= HAS_RENDER_L3CC;
 391
 392        return flags;
 393}
 394
 395/*
 396 * Get control_value from MOCS entry taking into account when it's not used:
 397 * I915_MOCS_PTE's value is returned in this case.
 398 */
 399static u32 get_entry_control(const struct drm_i915_mocs_table *table,
 400                             unsigned int index)
 401{
 402        if (index < table->size && table->table[index].used)
 403                return table->table[index].control_value;
 404
 405        return table->table[I915_MOCS_PTE].control_value;
 406}
 407
 408#define for_each_mocs(mocs, t, i) \
 409        for (i = 0; \
 410             i < (t)->n_entries ? (mocs = get_entry_control((t), i)), 1 : 0;\
 411             i++)
 412
 413static void __init_mocs_table(struct intel_uncore *uncore,
 414                              const struct drm_i915_mocs_table *table,
 415                              u32 addr)
 416{
 417        unsigned int i;
 418        u32 mocs;
 419
 420        for_each_mocs(mocs, table, i)
 421                intel_uncore_write_fw(uncore, _MMIO(addr + i * 4), mocs);
 422}
 423
 424static u32 mocs_offset(const struct intel_engine_cs *engine)
 425{
 426        static const u32 offset[] = {
 427                [RCS0]  =  __GEN9_RCS0_MOCS0,
 428                [VCS0]  =  __GEN9_VCS0_MOCS0,
 429                [VCS1]  =  __GEN9_VCS1_MOCS0,
 430                [VECS0] =  __GEN9_VECS0_MOCS0,
 431                [BCS0]  =  __GEN9_BCS0_MOCS0,
 432                [VCS2]  = __GEN11_VCS2_MOCS0,
 433        };
 434
 435        GEM_BUG_ON(engine->id >= ARRAY_SIZE(offset));
 436        return offset[engine->id];
 437}
 438
 439static void init_mocs_table(struct intel_engine_cs *engine,
 440                            const struct drm_i915_mocs_table *table)
 441{
 442        __init_mocs_table(engine->uncore, table, mocs_offset(engine));
 443}
 444
 445/*
 446 * Get l3cc_value from MOCS entry taking into account when it's not used:
 447 * I915_MOCS_PTE's value is returned in this case.
 448 */
 449static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table,
 450                          unsigned int index)
 451{
 452        if (index < table->size && table->table[index].used)
 453                return table->table[index].l3cc_value;
 454
 455        return table->table[I915_MOCS_PTE].l3cc_value;
 456}
 457
 458static u32 l3cc_combine(u16 low, u16 high)
 459{
 460        return low | (u32)high << 16;
 461}
 462
 463#define for_each_l3cc(l3cc, t, i) \
 464        for (i = 0; \
 465             i < ((t)->n_entries + 1) / 2 ? \
 466             (l3cc = l3cc_combine(get_entry_l3cc((t), 2 * i), \
 467                                  get_entry_l3cc((t), 2 * i + 1))), 1 : \
 468             0; \
 469             i++)
 470
 471static void init_l3cc_table(struct intel_engine_cs *engine,
 472                            const struct drm_i915_mocs_table *table)
 473{
 474        struct intel_uncore *uncore = engine->uncore;
 475        unsigned int i;
 476        u32 l3cc;
 477
 478        for_each_l3cc(l3cc, table, i)
 479                intel_uncore_write_fw(uncore, GEN9_LNCFCMOCS(i), l3cc);
 480}
 481
 482void intel_mocs_init_engine(struct intel_engine_cs *engine)
 483{
 484        struct drm_i915_mocs_table table;
 485        unsigned int flags;
 486
 487        /* Called under a blanket forcewake */
 488        assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
 489
 490        flags = get_mocs_settings(engine->i915, &table);
 491        if (!flags)
 492                return;
 493
 494        /* Platforms with global MOCS do not need per-engine initialization. */
 495        if (flags & HAS_ENGINE_MOCS)
 496                init_mocs_table(engine, &table);
 497
 498        if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS)
 499                init_l3cc_table(engine, &table);
 500}
 501
 502static u32 global_mocs_offset(void)
 503{
 504        return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
 505}
 506
 507void intel_mocs_init(struct intel_gt *gt)
 508{
 509        struct drm_i915_mocs_table table;
 510        unsigned int flags;
 511
 512        /*
 513         * LLC and eDRAM control values are not applicable to dgfx
 514         */
 515        flags = get_mocs_settings(gt->i915, &table);
 516        if (flags & HAS_GLOBAL_MOCS)
 517                __init_mocs_table(gt->uncore, &table, global_mocs_offset());
 518}
 519
 520#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 521#include "selftest_mocs.c"
 522#endif
 523