i915_memcpy.c 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. /*
  2. * Copyright © 2016 Intel Corporation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21. * IN THE SOFTWARE.
  22. *
  23. */
  24. #include <linux/kernel.h>
  25. #include <asm/fpu/api.h>
  26. #include "i915_memcpy.h"
  27. #if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
  28. #define CI_BUG_ON(expr) BUG_ON(expr)
  29. #else
  30. #define CI_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr)
  31. #endif
  32. static DEFINE_STATIC_KEY_FALSE(has_movntdqa);
  33. static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len)
  34. {
  35. kernel_fpu_begin();
  36. while (len >= 4) {
  37. asm("movntdqa (%0), %%xmm0\n"
  38. "movntdqa 16(%0), %%xmm1\n"
  39. "movntdqa 32(%0), %%xmm2\n"
  40. "movntdqa 48(%0), %%xmm3\n"
  41. "movaps %%xmm0, (%1)\n"
  42. "movaps %%xmm1, 16(%1)\n"
  43. "movaps %%xmm2, 32(%1)\n"
  44. "movaps %%xmm3, 48(%1)\n"
  45. :: "r" (src), "r" (dst) : "memory");
  46. src += 64;
  47. dst += 64;
  48. len -= 4;
  49. }
  50. while (len--) {
  51. asm("movntdqa (%0), %%xmm0\n"
  52. "movaps %%xmm0, (%1)\n"
  53. :: "r" (src), "r" (dst) : "memory");
  54. src += 16;
  55. dst += 16;
  56. }
  57. kernel_fpu_end();
  58. }
  59. static void __memcpy_ntdqu(void *dst, const void *src, unsigned long len)
  60. {
  61. kernel_fpu_begin();
  62. while (len >= 4) {
  63. asm("movntdqa (%0), %%xmm0\n"
  64. "movntdqa 16(%0), %%xmm1\n"
  65. "movntdqa 32(%0), %%xmm2\n"
  66. "movntdqa 48(%0), %%xmm3\n"
  67. "movups %%xmm0, (%1)\n"
  68. "movups %%xmm1, 16(%1)\n"
  69. "movups %%xmm2, 32(%1)\n"
  70. "movups %%xmm3, 48(%1)\n"
  71. :: "r" (src), "r" (dst) : "memory");
  72. src += 64;
  73. dst += 64;
  74. len -= 4;
  75. }
  76. while (len--) {
  77. asm("movntdqa (%0), %%xmm0\n"
  78. "movups %%xmm0, (%1)\n"
  79. :: "r" (src), "r" (dst) : "memory");
  80. src += 16;
  81. dst += 16;
  82. }
  83. kernel_fpu_end();
  84. }
  85. /**
  86. * i915_memcpy_from_wc: perform an accelerated *aligned* read from WC
  87. * @dst: destination pointer
  88. * @src: source pointer
  89. * @len: how many bytes to copy
  90. *
  91. * i915_memcpy_from_wc copies @len bytes from @src to @dst using
  92. * non-temporal instructions where available. Note that all arguments
  93. * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple
  94. * of 16.
  95. *
  96. * To test whether accelerated reads from WC are supported, use
  97. * i915_memcpy_from_wc(NULL, NULL, 0);
  98. *
  99. * Returns true if the copy was successful, false if the preconditions
  100. * are not met.
  101. */
  102. bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
  103. {
  104. if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15))
  105. return false;
  106. if (static_branch_likely(&has_movntdqa)) {
  107. if (likely(len))
  108. __memcpy_ntdqa(dst, src, len >> 4);
  109. return true;
  110. }
  111. return false;
  112. }
  113. /**
  114. * i915_unaligned_memcpy_from_wc: perform a mostly accelerated read from WC
  115. * @dst: destination pointer
  116. * @src: source pointer
  117. * @len: how many bytes to copy
  118. *
  119. * Like i915_memcpy_from_wc(), the unaligned variant copies @len bytes from
  120. * @src to @dst using * non-temporal instructions where available, but
  121. * accepts that its arguments may not be aligned, but are valid for the
  122. * potential 16-byte read past the end.
  123. */
  124. void i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len)
  125. {
  126. unsigned long addr;
  127. CI_BUG_ON(!i915_has_memcpy_from_wc());
  128. addr = (unsigned long)src;
  129. if (!IS_ALIGNED(addr, 16)) {
  130. unsigned long x = min(ALIGN(addr, 16) - addr, len);
  131. memcpy(dst, src, x);
  132. len -= x;
  133. dst += x;
  134. src += x;
  135. }
  136. if (likely(len))
  137. __memcpy_ntdqu(dst, src, DIV_ROUND_UP(len, 16));
  138. }
  139. void i915_memcpy_init_early(struct drm_i915_private *dev_priv)
  140. {
  141. /*
  142. * Some hypervisors (e.g. KVM) don't support VEX-prefix instructions
  143. * emulation. So don't enable movntdqa in hypervisor guest.
  144. */
  145. if (static_cpu_has(X86_FEATURE_XMM4_1) &&
  146. !boot_cpu_has(X86_FEATURE_HYPERVISOR))
  147. static_branch_enable(&has_movntdqa);
  148. }