linux/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
<<
>>
Prefs
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2019 Intel Corporation
   4 */
   5
   6#include <linux/sort.h>
   7
   8#include "gt/intel_gt.h"
   9#include "gt/intel_engine_user.h"
  10
  11#include "i915_selftest.h"
  12
  13#include "gem/i915_gem_context.h"
  14#include "selftests/igt_flush_test.h"
  15#include "selftests/i915_random.h"
  16#include "selftests/mock_drm.h"
  17#include "huge_gem_object.h"
  18#include "mock_context.h"
  19
  20static int wrap_ktime_compare(const void *A, const void *B)
  21{
  22        const ktime_t *a = A, *b = B;
  23
  24        return ktime_compare(*a, *b);
  25}
  26
  27static int __perf_fill_blt(struct drm_i915_gem_object *obj)
  28{
  29        struct drm_i915_private *i915 = to_i915(obj->base.dev);
  30        int inst = 0;
  31
  32        do {
  33                struct intel_engine_cs *engine;
  34                ktime_t t[5];
  35                int pass;
  36                int err;
  37
  38                engine = intel_engine_lookup_user(i915,
  39                                                  I915_ENGINE_CLASS_COPY,
  40                                                  inst++);
  41                if (!engine)
  42                        return 0;
  43
  44                intel_engine_pm_get(engine);
  45                for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
  46                        struct intel_context *ce = engine->kernel_context;
  47                        ktime_t t0, t1;
  48
  49                        t0 = ktime_get();
  50
  51                        err = i915_gem_object_fill_blt(obj, ce, 0);
  52                        if (err)
  53                                break;
  54
  55                        err = i915_gem_object_wait(obj,
  56                                                   I915_WAIT_ALL,
  57                                                   MAX_SCHEDULE_TIMEOUT);
  58                        if (err)
  59                                break;
  60
  61                        t1 = ktime_get();
  62                        t[pass] = ktime_sub(t1, t0);
  63                }
  64                intel_engine_pm_put(engine);
  65                if (err)
  66                        return err;
  67
  68                sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
  69                pr_info("%s: blt %zd KiB fill: %lld MiB/s\n",
  70                        engine->name,
  71                        obj->base.size >> 10,
  72                        div64_u64(mul_u32_u32(4 * obj->base.size,
  73                                              1000 * 1000 * 1000),
  74                                  t[1] + 2 * t[2] + t[3]) >> 20);
  75        } while (1);
  76}
  77
  78static int perf_fill_blt(void *arg)
  79{
  80        struct drm_i915_private *i915 = arg;
  81        static const unsigned long sizes[] = {
  82                SZ_4K,
  83                SZ_64K,
  84                SZ_2M,
  85                SZ_64M
  86        };
  87        int i;
  88
  89        for (i = 0; i < ARRAY_SIZE(sizes); i++) {
  90                struct drm_i915_gem_object *obj;
  91                int err;
  92
  93                obj = i915_gem_object_create_internal(i915, sizes[i]);
  94                if (IS_ERR(obj))
  95                        return PTR_ERR(obj);
  96
  97                err = __perf_fill_blt(obj);
  98                i915_gem_object_put(obj);
  99                if (err)
 100                        return err;
 101        }
 102
 103        return 0;
 104}
 105
 106static int __perf_copy_blt(struct drm_i915_gem_object *src,
 107                           struct drm_i915_gem_object *dst)
 108{
 109        struct drm_i915_private *i915 = to_i915(src->base.dev);
 110        int inst = 0;
 111
 112        do {
 113                struct intel_engine_cs *engine;
 114                ktime_t t[5];
 115                int pass;
 116                int err = 0;
 117
 118                engine = intel_engine_lookup_user(i915,
 119                                                  I915_ENGINE_CLASS_COPY,
 120                                                  inst++);
 121                if (!engine)
 122                        return 0;
 123
 124                intel_engine_pm_get(engine);
 125                for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
 126                        struct intel_context *ce = engine->kernel_context;
 127                        ktime_t t0, t1;
 128
 129                        t0 = ktime_get();
 130
 131                        err = i915_gem_object_copy_blt(src, dst, ce);
 132                        if (err)
 133                                break;
 134
 135                        err = i915_gem_object_wait(dst,
 136                                                   I915_WAIT_ALL,
 137                                                   MAX_SCHEDULE_TIMEOUT);
 138                        if (err)
 139                                break;
 140
 141                        t1 = ktime_get();
 142                        t[pass] = ktime_sub(t1, t0);
 143                }
 144                intel_engine_pm_put(engine);
 145                if (err)
 146                        return err;
 147
 148                sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
 149                pr_info("%s: blt %zd KiB copy: %lld MiB/s\n",
 150                        engine->name,
 151                        src->base.size >> 10,
 152                        div64_u64(mul_u32_u32(4 * src->base.size,
 153                                              1000 * 1000 * 1000),
 154                                  t[1] + 2 * t[2] + t[3]) >> 20);
 155        } while (1);
 156}
 157
 158static int perf_copy_blt(void *arg)
 159{
 160        struct drm_i915_private *i915 = arg;
 161        static const unsigned long sizes[] = {
 162                SZ_4K,
 163                SZ_64K,
 164                SZ_2M,
 165                SZ_64M
 166        };
 167        int i;
 168
 169        for (i = 0; i < ARRAY_SIZE(sizes); i++) {
 170                struct drm_i915_gem_object *src, *dst;
 171                int err;
 172
 173                src = i915_gem_object_create_internal(i915, sizes[i]);
 174                if (IS_ERR(src))
 175                        return PTR_ERR(src);
 176
 177                dst = i915_gem_object_create_internal(i915, sizes[i]);
 178                if (IS_ERR(dst)) {
 179                        err = PTR_ERR(dst);
 180                        goto err_src;
 181                }
 182
 183                err = __perf_copy_blt(src, dst);
 184
 185                i915_gem_object_put(dst);
 186err_src:
 187                i915_gem_object_put(src);
 188                if (err)
 189                        return err;
 190        }
 191
 192        return 0;
 193}
 194
 195struct igt_thread_arg {
 196        struct intel_engine_cs *engine;
 197        struct i915_gem_context *ctx;
 198        struct file *file;
 199        struct rnd_state prng;
 200        unsigned int n_cpus;
 201};
 202
 203static int igt_fill_blt_thread(void *arg)
 204{
 205        struct igt_thread_arg *thread = arg;
 206        struct intel_engine_cs *engine = thread->engine;
 207        struct rnd_state *prng = &thread->prng;
 208        struct drm_i915_gem_object *obj;
 209        struct i915_gem_context *ctx;
 210        struct intel_context *ce;
 211        unsigned int prio;
 212        IGT_TIMEOUT(end);
 213        u64 total, max;
 214        int err;
 215
 216        ctx = thread->ctx;
 217        if (!ctx) {
 218                ctx = live_context_for_engine(engine, thread->file);
 219                if (IS_ERR(ctx))
 220                        return PTR_ERR(ctx);
 221
 222                prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
 223                ctx->sched.priority = prio;
 224        }
 225
 226        ce = i915_gem_context_get_engine(ctx, 0);
 227        GEM_BUG_ON(IS_ERR(ce));
 228
 229        /*
 230         * If we have a tiny shared address space, like for the GGTT
 231         * then we can't be too greedy.
 232         */
 233        max = ce->vm->total;
 234        if (i915_is_ggtt(ce->vm) || thread->ctx)
 235                max = div_u64(max, thread->n_cpus);
 236        max >>= 4;
 237
 238        total = PAGE_SIZE;
 239        do {
 240                /* Aim to keep the runtime under reasonable bounds! */
 241                const u32 max_phys_size = SZ_64K;
 242                u32 val = prandom_u32_state(prng);
 243                u32 phys_sz;
 244                u32 sz;
 245                u32 *vaddr;
 246                u32 i;
 247
 248                total = min(total, max);
 249                sz = i915_prandom_u32_max_state(total, prng) + 1;
 250                phys_sz = sz % max_phys_size + 1;
 251
 252                sz = round_up(sz, PAGE_SIZE);
 253                phys_sz = round_up(phys_sz, PAGE_SIZE);
 254                phys_sz = min(phys_sz, sz);
 255
 256                pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
 257                         phys_sz, sz, val);
 258
 259                obj = huge_gem_object(engine->i915, phys_sz, sz);
 260                if (IS_ERR(obj)) {
 261                        err = PTR_ERR(obj);
 262                        goto err_flush;
 263                }
 264
 265                vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
 266                if (IS_ERR(vaddr)) {
 267                        err = PTR_ERR(vaddr);
 268                        goto err_put;
 269                }
 270
 271                /*
 272                 * Make sure the potentially async clflush does its job, if
 273                 * required.
 274                 */
 275                memset32(vaddr, val ^ 0xdeadbeaf,
 276                         huge_gem_object_phys_size(obj) / sizeof(u32));
 277
 278                if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
 279                        obj->cache_dirty = true;
 280
 281                err = i915_gem_object_fill_blt(obj, ce, val);
 282                if (err)
 283                        goto err_unpin;
 284
 285                err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
 286                if (err)
 287                        goto err_unpin;
 288
 289                for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); i += 17) {
 290                        if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
 291                                drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i]));
 292
 293                        if (vaddr[i] != val) {
 294                                pr_err("vaddr[%u]=%x, expected=%x\n", i,
 295                                       vaddr[i], val);
 296                                err = -EINVAL;
 297                                goto err_unpin;
 298                        }
 299                }
 300
 301                i915_gem_object_unpin_map(obj);
 302                i915_gem_object_put(obj);
 303
 304                total <<= 1;
 305        } while (!time_after(jiffies, end));
 306
 307        goto err_flush;
 308
 309err_unpin:
 310        i915_gem_object_unpin_map(obj);
 311err_put:
 312        i915_gem_object_put(obj);
 313err_flush:
 314        if (err == -ENOMEM)
 315                err = 0;
 316
 317        intel_context_put(ce);
 318        return err;
 319}
 320
 321static int igt_copy_blt_thread(void *arg)
 322{
 323        struct igt_thread_arg *thread = arg;
 324        struct intel_engine_cs *engine = thread->engine;
 325        struct rnd_state *prng = &thread->prng;
 326        struct drm_i915_gem_object *src, *dst;
 327        struct i915_gem_context *ctx;
 328        struct intel_context *ce;
 329        unsigned int prio;
 330        IGT_TIMEOUT(end);
 331        u64 total, max;
 332        int err;
 333
 334        ctx = thread->ctx;
 335        if (!ctx) {
 336                ctx = live_context_for_engine(engine, thread->file);
 337                if (IS_ERR(ctx))
 338                        return PTR_ERR(ctx);
 339
 340                prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
 341                ctx->sched.priority = prio;
 342        }
 343
 344        ce = i915_gem_context_get_engine(ctx, 0);
 345        GEM_BUG_ON(IS_ERR(ce));
 346
 347        /*
 348         * If we have a tiny shared address space, like for the GGTT
 349         * then we can't be too greedy.
 350         */
 351        max = ce->vm->total;
 352        if (i915_is_ggtt(ce->vm) || thread->ctx)
 353                max = div_u64(max, thread->n_cpus);
 354        max >>= 4;
 355
 356        total = PAGE_SIZE;
 357        do {
 358                /* Aim to keep the runtime under reasonable bounds! */
 359                const u32 max_phys_size = SZ_64K;
 360                u32 val = prandom_u32_state(prng);
 361                u32 phys_sz;
 362                u32 sz;
 363                u32 *vaddr;
 364                u32 i;
 365
 366                total = min(total, max);
 367                sz = i915_prandom_u32_max_state(total, prng) + 1;
 368                phys_sz = sz % max_phys_size + 1;
 369
 370                sz = round_up(sz, PAGE_SIZE);
 371                phys_sz = round_up(phys_sz, PAGE_SIZE);
 372                phys_sz = min(phys_sz, sz);
 373
 374                pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
 375                         phys_sz, sz, val);
 376
 377                src = huge_gem_object(engine->i915, phys_sz, sz);
 378                if (IS_ERR(src)) {
 379                        err = PTR_ERR(src);
 380                        goto err_flush;
 381                }
 382
 383                vaddr = i915_gem_object_pin_map_unlocked(src, I915_MAP_WB);
 384                if (IS_ERR(vaddr)) {
 385                        err = PTR_ERR(vaddr);
 386                        goto err_put_src;
 387                }
 388
 389                memset32(vaddr, val,
 390                         huge_gem_object_phys_size(src) / sizeof(u32));
 391
 392                i915_gem_object_unpin_map(src);
 393
 394                if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
 395                        src->cache_dirty = true;
 396
 397                dst = huge_gem_object(engine->i915, phys_sz, sz);
 398                if (IS_ERR(dst)) {
 399                        err = PTR_ERR(dst);
 400                        goto err_put_src;
 401                }
 402
 403                vaddr = i915_gem_object_pin_map_unlocked(dst, I915_MAP_WB);
 404                if (IS_ERR(vaddr)) {
 405                        err = PTR_ERR(vaddr);
 406                        goto err_put_dst;
 407                }
 408
 409                memset32(vaddr, val ^ 0xdeadbeaf,
 410                         huge_gem_object_phys_size(dst) / sizeof(u32));
 411
 412                if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
 413                        dst->cache_dirty = true;
 414
 415                err = i915_gem_object_copy_blt(src, dst, ce);
 416                if (err)
 417                        goto err_unpin;
 418
 419                err = i915_gem_object_wait(dst, 0, MAX_SCHEDULE_TIMEOUT);
 420                if (err)
 421                        goto err_unpin;
 422
 423                for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); i += 17) {
 424                        if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
 425                                drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i]));
 426
 427                        if (vaddr[i] != val) {
 428                                pr_err("vaddr[%u]=%x, expected=%x\n", i,
 429                                       vaddr[i], val);
 430                                err = -EINVAL;
 431                                goto err_unpin;
 432                        }
 433                }
 434
 435                i915_gem_object_unpin_map(dst);
 436
 437                i915_gem_object_put(src);
 438                i915_gem_object_put(dst);
 439
 440                total <<= 1;
 441        } while (!time_after(jiffies, end));
 442
 443        goto err_flush;
 444
 445err_unpin:
 446        i915_gem_object_unpin_map(dst);
 447err_put_dst:
 448        i915_gem_object_put(dst);
 449err_put_src:
 450        i915_gem_object_put(src);
 451err_flush:
 452        if (err == -ENOMEM)
 453                err = 0;
 454
 455        intel_context_put(ce);
 456        return err;
 457}
 458
 459static int igt_threaded_blt(struct intel_engine_cs *engine,
 460                            int (*blt_fn)(void *arg),
 461                            unsigned int flags)
 462#define SINGLE_CTX BIT(0)
 463{
 464        struct igt_thread_arg *thread;
 465        struct task_struct **tsk;
 466        unsigned int n_cpus, i;
 467        I915_RND_STATE(prng);
 468        int err = 0;
 469
 470        n_cpus = num_online_cpus() + 1;
 471
 472        tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL);
 473        if (!tsk)
 474                return 0;
 475
 476        thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL);
 477        if (!thread)
 478                goto out_tsk;
 479
 480        thread[0].file = mock_file(engine->i915);
 481        if (IS_ERR(thread[0].file)) {
 482                err = PTR_ERR(thread[0].file);
 483                goto out_thread;
 484        }
 485
 486        if (flags & SINGLE_CTX) {
 487                thread[0].ctx = live_context_for_engine(engine, thread[0].file);
 488                if (IS_ERR(thread[0].ctx)) {
 489                        err = PTR_ERR(thread[0].ctx);
 490                        goto out_file;
 491                }
 492        }
 493
 494        for (i = 0; i < n_cpus; ++i) {
 495                thread[i].engine = engine;
 496                thread[i].file = thread[0].file;
 497                thread[i].ctx = thread[0].ctx;
 498                thread[i].n_cpus = n_cpus;
 499                thread[i].prng =
 500                        I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
 501
 502                tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i);
 503                if (IS_ERR(tsk[i])) {
 504                        err = PTR_ERR(tsk[i]);
 505                        break;
 506                }
 507
 508                get_task_struct(tsk[i]);
 509        }
 510
 511        yield(); /* start all threads before we kthread_stop() */
 512
 513        for (i = 0; i < n_cpus; ++i) {
 514                int status;
 515
 516                if (IS_ERR_OR_NULL(tsk[i]))
 517                        continue;
 518
 519                status = kthread_stop(tsk[i]);
 520                if (status && !err)
 521                        err = status;
 522
 523                put_task_struct(tsk[i]);
 524        }
 525
 526out_file:
 527        fput(thread[0].file);
 528out_thread:
 529        kfree(thread);
 530out_tsk:
 531        kfree(tsk);
 532        return err;
 533}
 534
 535static int test_copy_engines(struct drm_i915_private *i915,
 536                             int (*fn)(void *arg),
 537                             unsigned int flags)
 538{
 539        struct intel_engine_cs *engine;
 540        int ret;
 541
 542        for_each_uabi_class_engine(engine, I915_ENGINE_CLASS_COPY, i915) {
 543                ret = igt_threaded_blt(engine, fn, flags);
 544                if (ret)
 545                        return ret;
 546        }
 547
 548        return 0;
 549}
 550
 551static int igt_fill_blt(void *arg)
 552{
 553        return test_copy_engines(arg, igt_fill_blt_thread, 0);
 554}
 555
 556static int igt_fill_blt_ctx0(void *arg)
 557{
 558        return test_copy_engines(arg, igt_fill_blt_thread, SINGLE_CTX);
 559}
 560
 561static int igt_copy_blt(void *arg)
 562{
 563        return test_copy_engines(arg, igt_copy_blt_thread, 0);
 564}
 565
 566static int igt_copy_blt_ctx0(void *arg)
 567{
 568        return test_copy_engines(arg, igt_copy_blt_thread, SINGLE_CTX);
 569}
 570
 571int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915)
 572{
 573        static const struct i915_subtest tests[] = {
 574                SUBTEST(igt_fill_blt),
 575                SUBTEST(igt_fill_blt_ctx0),
 576                SUBTEST(igt_copy_blt),
 577                SUBTEST(igt_copy_blt_ctx0),
 578        };
 579
 580        if (intel_gt_is_wedged(&i915->gt))
 581                return 0;
 582
 583        return i915_live_subtests(tests, i915);
 584}
 585
 586int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915)
 587{
 588        static const struct i915_subtest tests[] = {
 589                SUBTEST(perf_fill_blt),
 590                SUBTEST(perf_copy_blt),
 591        };
 592
 593        if (intel_gt_is_wedged(&i915->gt))
 594                return 0;
 595
 596        return i915_live_subtests(tests, i915);
 597}
 598