drm_cache.c 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. /**************************************************************************
  2. *
  3. * Copyright (c) 2006-2007 Tungsten Graphics, Inc., Cedar Park, TX., USA
  4. * All Rights Reserved.
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a
  7. * copy of this software and associated documentation files (the
  8. * "Software"), to deal in the Software without restriction, including
  9. * without limitation the rights to use, copy, modify, merge, publish,
  10. * distribute, sub license, and/or sell copies of the Software, and to
  11. * permit persons to whom the Software is furnished to do so, subject to
  12. * the following conditions:
  13. *
  14. * The above copyright notice and this permission notice (including the
  15. * next paragraph) shall be included in all copies or substantial portions
  16. * of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  21. * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  22. * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  23. * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  24. * USE OR OTHER DEALINGS IN THE SOFTWARE.
  25. *
  26. **************************************************************************/
  27. /*
  28. * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
  29. */
  30. #include <linux/cc_platform.h>
  31. #include <linux/export.h>
  32. #include <linux/highmem.h>
  33. #include <linux/ioport.h>
  34. #include <linux/iosys-map.h>
  35. #include <xen/xen.h>
  36. #include <drm/drm_cache.h>
  37. /* A small bounce buffer that fits on the stack. */
  38. #define MEMCPY_BOUNCE_SIZE 128
  39. #if defined(CONFIG_X86)
  40. #include <asm/smp.h>
  41. /*
  42. * clflushopt is an unordered instruction which needs fencing with mfence or
  43. * sfence to avoid ordering issues. For drm_clflush_page this fencing happens
  44. * in the caller.
  45. */
  46. static void
  47. drm_clflush_page(struct page *page)
  48. {
  49. uint8_t *page_virtual;
  50. unsigned int i;
  51. const int size = boot_cpu_data.x86_clflush_size;
  52. if (unlikely(page == NULL))
  53. return;
  54. page_virtual = kmap_atomic(page);
  55. for (i = 0; i < PAGE_SIZE; i += size)
  56. clflushopt(page_virtual + i);
  57. kunmap_atomic(page_virtual);
  58. }
  59. static void drm_cache_flush_clflush(struct page *pages[],
  60. unsigned long num_pages)
  61. {
  62. unsigned long i;
  63. mb(); /*Full memory barrier used before so that CLFLUSH is ordered*/
  64. for (i = 0; i < num_pages; i++)
  65. drm_clflush_page(*pages++);
  66. mb(); /*Also used after CLFLUSH so that all cache is flushed*/
  67. }
  68. #endif
  69. /**
  70. * drm_clflush_pages - Flush dcache lines of a set of pages.
  71. * @pages: List of pages to be flushed.
  72. * @num_pages: Number of pages in the array.
  73. *
  74. * Flush every data cache line entry that points to an address belonging
  75. * to a page in the array.
  76. */
  77. void
  78. drm_clflush_pages(struct page *pages[], unsigned long num_pages)
  79. {
  80. #if defined(CONFIG_X86)
  81. if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
  82. drm_cache_flush_clflush(pages, num_pages);
  83. return;
  84. }
  85. if (wbinvd_on_all_cpus())
  86. pr_err("Timed out waiting for cache flush\n");
  87. #elif defined(__powerpc__)
  88. unsigned long i;
  89. for (i = 0; i < num_pages; i++) {
  90. struct page *page = pages[i];
  91. void *page_virtual;
  92. if (unlikely(page == NULL))
  93. continue;
  94. page_virtual = kmap_atomic(page);
  95. flush_dcache_range((unsigned long)page_virtual,
  96. (unsigned long)page_virtual + PAGE_SIZE);
  97. kunmap_atomic(page_virtual);
  98. }
  99. #else
  100. WARN_ONCE(1, "Architecture has no drm_cache.c support\n");
  101. #endif
  102. }
  103. EXPORT_SYMBOL(drm_clflush_pages);
  104. /**
  105. * drm_clflush_sg - Flush dcache lines pointing to a scather-gather.
  106. * @st: struct sg_table.
  107. *
  108. * Flush every data cache line entry that points to an address in the
  109. * sg.
  110. */
  111. void
  112. drm_clflush_sg(struct sg_table *st)
  113. {
  114. #if defined(CONFIG_X86)
  115. if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
  116. struct sg_page_iter sg_iter;
  117. mb(); /*CLFLUSH is ordered only by using memory barriers*/
  118. for_each_sgtable_page(st, &sg_iter, 0)
  119. drm_clflush_page(sg_page_iter_page(&sg_iter));
  120. mb(); /*Make sure that all cache line entry is flushed*/
  121. return;
  122. }
  123. if (wbinvd_on_all_cpus())
  124. pr_err("Timed out waiting for cache flush\n");
  125. #else
  126. WARN_ONCE(1, "Architecture has no drm_cache.c support\n");
  127. #endif
  128. }
  129. EXPORT_SYMBOL(drm_clflush_sg);
  130. /**
  131. * drm_clflush_virt_range - Flush dcache lines of a region
  132. * @addr: Initial kernel memory address.
  133. * @length: Region size.
  134. *
  135. * Flush every data cache line entry that points to an address in the
  136. * region requested.
  137. */
  138. void
  139. drm_clflush_virt_range(void *addr, unsigned long length)
  140. {
  141. #if defined(CONFIG_X86)
  142. if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
  143. const int size = boot_cpu_data.x86_clflush_size;
  144. void *end = addr + length;
  145. addr = (void *)(((unsigned long)addr) & -size);
  146. mb(); /*CLFLUSH is only ordered with a full memory barrier*/
  147. for (; addr < end; addr += size)
  148. clflushopt(addr);
  149. clflushopt(end - 1); /* force serialisation */
  150. mb(); /*Ensure that every data cache line entry is flushed*/
  151. return;
  152. }
  153. if (wbinvd_on_all_cpus())
  154. pr_err("Timed out waiting for cache flush\n");
  155. #else
  156. WARN_ONCE(1, "Architecture has no drm_cache.c support\n");
  157. #endif
  158. }
  159. EXPORT_SYMBOL(drm_clflush_virt_range);
  160. bool drm_need_swiotlb(int dma_bits)
  161. {
  162. struct resource *tmp;
  163. resource_size_t max_iomem = 0;
  164. /*
  165. * Xen paravirtual hosts require swiotlb regardless of requested dma
  166. * transfer size.
  167. *
  168. * NOTE: Really, what it requires is use of the dma_alloc_coherent
  169. * allocator used in ttm_dma_populate() instead of
  170. * ttm_populate_and_map_pages(), which bounce buffers so much in
  171. * Xen it leads to swiotlb buffer exhaustion.
  172. */
  173. if (xen_pv_domain())
  174. return true;
  175. /*
  176. * Enforce dma_alloc_coherent when memory encryption is active as well
  177. * for the same reasons as for Xen paravirtual hosts.
  178. */
  179. if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
  180. return true;
  181. for (tmp = iomem_resource.child; tmp; tmp = tmp->sibling)
  182. max_iomem = max(max_iomem, tmp->end);
  183. return max_iomem > ((u64)1 << dma_bits);
  184. }
  185. EXPORT_SYMBOL(drm_need_swiotlb);
  186. static void memcpy_fallback(struct iosys_map *dst,
  187. const struct iosys_map *src,
  188. unsigned long len)
  189. {
  190. if (!dst->is_iomem && !src->is_iomem) {
  191. memcpy(dst->vaddr, src->vaddr, len);
  192. } else if (!src->is_iomem) {
  193. iosys_map_memcpy_to(dst, 0, src->vaddr, len);
  194. } else if (!dst->is_iomem) {
  195. memcpy_fromio(dst->vaddr, src->vaddr_iomem, len);
  196. } else {
  197. /*
  198. * Bounce size is not performance tuned, but using a
  199. * bounce buffer like this is significantly faster than
  200. * resorting to ioreadxx() + iowritexx().
  201. */
  202. char bounce[MEMCPY_BOUNCE_SIZE];
  203. void __iomem *_src = src->vaddr_iomem;
  204. void __iomem *_dst = dst->vaddr_iomem;
  205. while (len >= MEMCPY_BOUNCE_SIZE) {
  206. memcpy_fromio(bounce, _src, MEMCPY_BOUNCE_SIZE);
  207. memcpy_toio(_dst, bounce, MEMCPY_BOUNCE_SIZE);
  208. _src += MEMCPY_BOUNCE_SIZE;
  209. _dst += MEMCPY_BOUNCE_SIZE;
  210. len -= MEMCPY_BOUNCE_SIZE;
  211. }
  212. if (len) {
  213. memcpy_fromio(bounce, _src, MEMCPY_BOUNCE_SIZE);
  214. memcpy_toio(_dst, bounce, MEMCPY_BOUNCE_SIZE);
  215. }
  216. }
  217. }
  218. #ifdef CONFIG_X86
  219. static DEFINE_STATIC_KEY_FALSE(has_movntdqa);
  220. static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len)
  221. {
  222. kernel_fpu_begin();
  223. while (len >= 4) {
  224. asm("movntdqa (%0), %%xmm0\n"
  225. "movntdqa 16(%0), %%xmm1\n"
  226. "movntdqa 32(%0), %%xmm2\n"
  227. "movntdqa 48(%0), %%xmm3\n"
  228. "movaps %%xmm0, (%1)\n"
  229. "movaps %%xmm1, 16(%1)\n"
  230. "movaps %%xmm2, 32(%1)\n"
  231. "movaps %%xmm3, 48(%1)\n"
  232. :: "r" (src), "r" (dst) : "memory");
  233. src += 64;
  234. dst += 64;
  235. len -= 4;
  236. }
  237. while (len--) {
  238. asm("movntdqa (%0), %%xmm0\n"
  239. "movaps %%xmm0, (%1)\n"
  240. :: "r" (src), "r" (dst) : "memory");
  241. src += 16;
  242. dst += 16;
  243. }
  244. kernel_fpu_end();
  245. }
  246. /*
  247. * __drm_memcpy_from_wc copies @len bytes from @src to @dst using
  248. * non-temporal instructions where available. Note that all arguments
  249. * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple
  250. * of 16.
  251. */
  252. static void __drm_memcpy_from_wc(void *dst, const void *src, unsigned long len)
  253. {
  254. if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15))
  255. memcpy(dst, src, len);
  256. else if (likely(len))
  257. __memcpy_ntdqa(dst, src, len >> 4);
  258. }
  259. /**
  260. * drm_memcpy_from_wc - Perform the fastest available memcpy from a source
  261. * that may be WC.
  262. * @dst: The destination pointer
  263. * @src: The source pointer
  264. * @len: The size of the area o transfer in bytes
  265. *
  266. * Tries an arch optimized memcpy for prefetching reading out of a WC region,
  267. * and if no such beast is available, falls back to a normal memcpy.
  268. */
  269. void drm_memcpy_from_wc(struct iosys_map *dst,
  270. const struct iosys_map *src,
  271. unsigned long len)
  272. {
  273. if (WARN_ON(in_interrupt())) {
  274. memcpy_fallback(dst, src, len);
  275. return;
  276. }
  277. if (static_branch_likely(&has_movntdqa)) {
  278. __drm_memcpy_from_wc(dst->is_iomem ?
  279. (void __force *)dst->vaddr_iomem :
  280. dst->vaddr,
  281. src->is_iomem ?
  282. (void const __force *)src->vaddr_iomem :
  283. src->vaddr,
  284. len);
  285. return;
  286. }
  287. memcpy_fallback(dst, src, len);
  288. }
  289. EXPORT_SYMBOL(drm_memcpy_from_wc);
  290. /*
  291. * drm_memcpy_init_early - One time initialization of the WC memcpy code
  292. */
  293. void drm_memcpy_init_early(void)
  294. {
  295. /*
  296. * Some hypervisors (e.g. KVM) don't support VEX-prefix instructions
  297. * emulation. So don't enable movntdqa in hypervisor guest.
  298. */
  299. if (static_cpu_has(X86_FEATURE_XMM4_1) &&
  300. !boot_cpu_has(X86_FEATURE_HYPERVISOR))
  301. static_branch_enable(&has_movntdqa);
  302. }
  303. #else
  304. void drm_memcpy_from_wc(struct iosys_map *dst,
  305. const struct iosys_map *src,
  306. unsigned long len)
  307. {
  308. WARN_ON(in_interrupt());
  309. memcpy_fallback(dst, src, len);
  310. }
  311. EXPORT_SYMBOL(drm_memcpy_from_wc);
  312. void drm_memcpy_init_early(void)
  313. {
  314. }
  315. #endif /* CONFIG_X86 */