linux/drivers/gpu/drm/i915/i915_memcpy.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2016 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 */
  24
  25#include <linux/kernel.h>
  26#include <asm/fpu/api.h>
  27
  28#include "i915_memcpy.h"
  29
  30#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
  31#define CI_BUG_ON(expr) BUG_ON(expr)
  32#else
  33#define CI_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr)
  34#endif
  35
  36static DEFINE_STATIC_KEY_FALSE(has_movntdqa);
  37
  38static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len)
  39{
  40        kernel_fpu_begin();
  41
  42        while (len >= 4) {
  43                asm("movntdqa   (%0), %%xmm0\n"
  44                    "movntdqa 16(%0), %%xmm1\n"
  45                    "movntdqa 32(%0), %%xmm2\n"
  46                    "movntdqa 48(%0), %%xmm3\n"
  47                    "movaps %%xmm0,   (%1)\n"
  48                    "movaps %%xmm1, 16(%1)\n"
  49                    "movaps %%xmm2, 32(%1)\n"
  50                    "movaps %%xmm3, 48(%1)\n"
  51                    :: "r" (src), "r" (dst) : "memory");
  52                src += 64;
  53                dst += 64;
  54                len -= 4;
  55        }
  56        while (len--) {
  57                asm("movntdqa (%0), %%xmm0\n"
  58                    "movaps %%xmm0, (%1)\n"
  59                    :: "r" (src), "r" (dst) : "memory");
  60                src += 16;
  61                dst += 16;
  62        }
  63
  64        kernel_fpu_end();
  65}
  66
  67static void __memcpy_ntdqu(void *dst, const void *src, unsigned long len)
  68{
  69        kernel_fpu_begin();
  70
  71        while (len >= 4) {
  72                asm("movntdqa   (%0), %%xmm0\n"
  73                    "movntdqa 16(%0), %%xmm1\n"
  74                    "movntdqa 32(%0), %%xmm2\n"
  75                    "movntdqa 48(%0), %%xmm3\n"
  76                    "movups %%xmm0,   (%1)\n"
  77                    "movups %%xmm1, 16(%1)\n"
  78                    "movups %%xmm2, 32(%1)\n"
  79                    "movups %%xmm3, 48(%1)\n"
  80                    :: "r" (src), "r" (dst) : "memory");
  81                src += 64;
  82                dst += 64;
  83                len -= 4;
  84        }
  85        while (len--) {
  86                asm("movntdqa (%0), %%xmm0\n"
  87                    "movups %%xmm0, (%1)\n"
  88                    :: "r" (src), "r" (dst) : "memory");
  89                src += 16;
  90                dst += 16;
  91        }
  92
  93        kernel_fpu_end();
  94}
  95
  96/**
  97 * i915_memcpy_from_wc: perform an accelerated *aligned* read from WC
  98 * @dst: destination pointer
  99 * @src: source pointer
 100 * @len: how many bytes to copy
 101 *
 102 * i915_memcpy_from_wc copies @len bytes from @src to @dst using
 103 * non-temporal instructions where available. Note that all arguments
 104 * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple
 105 * of 16.
 106 *
 107 * To test whether accelerated reads from WC are supported, use
 108 * i915_memcpy_from_wc(NULL, NULL, 0);
 109 *
 110 * Returns true if the copy was successful, false if the preconditions
 111 * are not met.
 112 */
 113bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
 114{
 115        if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15))
 116                return false;
 117
 118        if (static_branch_likely(&has_movntdqa)) {
 119                if (likely(len))
 120                        __memcpy_ntdqa(dst, src, len >> 4);
 121                return true;
 122        }
 123
 124        return false;
 125}
 126
 127/**
 128 * i915_unaligned_memcpy_from_wc: perform a mostly accelerated read from WC
 129 * @dst: destination pointer
 130 * @src: source pointer
 131 * @len: how many bytes to copy
 132 *
 133 * Like i915_memcpy_from_wc(), the unaligned variant copies @len bytes from
 134 * @src to @dst using * non-temporal instructions where available, but
 135 * accepts that its arguments may not be aligned, but are valid for the
 136 * potential 16-byte read past the end.
 137 */
 138void i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len)
 139{
 140        unsigned long addr;
 141
 142        CI_BUG_ON(!i915_has_memcpy_from_wc());
 143
 144        addr = (unsigned long)src;
 145        if (!IS_ALIGNED(addr, 16)) {
 146                unsigned long x = min(ALIGN(addr, 16) - addr, len);
 147
 148                memcpy(dst, src, x);
 149
 150                len -= x;
 151                dst += x;
 152                src += x;
 153        }
 154
 155        if (likely(len))
 156                __memcpy_ntdqu(dst, src, DIV_ROUND_UP(len, 16));
 157}
 158
 159void i915_memcpy_init_early(struct drm_i915_private *dev_priv)
 160{
 161        /*
 162         * Some hypervisors (e.g. KVM) don't support VEX-prefix instructions
 163         * emulation. So don't enable movntdqa in hypervisor guest.
 164         */
 165        if (static_cpu_has(X86_FEATURE_XMM4_1) &&
 166            !boot_cpu_has(X86_FEATURE_HYPERVISOR))
 167                static_branch_enable(&has_movntdqa);
 168}
 169
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.