clear_page_64.S 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. /* SPDX-License-Identifier: GPL-2.0-only */
  2. #include <linux/linkage.h>
  3. #include <asm/asm.h>
  4. #include <asm/export.h>
  5. /*
  6. * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
  7. * recommended to use this when possible and we do use them by default.
  8. * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
  9. * Otherwise, use original.
  10. */
  11. /*
  12. * Zero a page.
  13. * %rdi - page
  14. */
  15. SYM_FUNC_START(clear_page_rep)
  16. movl $4096/8,%ecx
  17. xorl %eax,%eax
  18. rep stosq
  19. RET
  20. SYM_FUNC_END(clear_page_rep)
  21. EXPORT_SYMBOL_GPL(clear_page_rep)
  22. SYM_FUNC_START(clear_page_orig)
  23. xorl %eax,%eax
  24. movl $4096/64,%ecx
  25. .p2align 4
  26. .Lloop:
  27. decl %ecx
  28. #define PUT(x) movq %rax,x*8(%rdi)
  29. movq %rax,(%rdi)
  30. PUT(1)
  31. PUT(2)
  32. PUT(3)
  33. PUT(4)
  34. PUT(5)
  35. PUT(6)
  36. PUT(7)
  37. leaq 64(%rdi),%rdi
  38. jnz .Lloop
  39. nop
  40. RET
  41. SYM_FUNC_END(clear_page_orig)
  42. EXPORT_SYMBOL_GPL(clear_page_orig)
  43. SYM_FUNC_START(clear_page_erms)
  44. movl $4096,%ecx
  45. xorl %eax,%eax
  46. rep stosb
  47. RET
  48. SYM_FUNC_END(clear_page_erms)
  49. EXPORT_SYMBOL_GPL(clear_page_erms)
  50. /*
  51. * Default clear user-space.
  52. * Input:
  53. * rdi destination
  54. * rcx count
  55. *
  56. * Output:
  57. * rcx: uncleared bytes or 0 if successful.
  58. */
  59. SYM_FUNC_START(clear_user_original)
  60. /*
  61. * Copy only the lower 32 bits of size as that is enough to handle the rest bytes,
  62. * i.e., no need for a 'q' suffix and thus a REX prefix.
  63. */
  64. mov %ecx,%eax
  65. shr $3,%rcx
  66. jz .Lrest_bytes
  67. # do the qwords first
  68. .p2align 4
  69. .Lqwords:
  70. movq $0,(%rdi)
  71. lea 8(%rdi),%rdi
  72. dec %rcx
  73. jnz .Lqwords
  74. .Lrest_bytes:
  75. and $7, %eax
  76. jz .Lexit
  77. # now do the rest bytes
  78. .Lbytes:
  79. movb $0,(%rdi)
  80. inc %rdi
  81. dec %eax
  82. jnz .Lbytes
  83. .Lexit:
  84. /*
  85. * %rax still needs to be cleared in the exception case because this function is called
  86. * from inline asm and the compiler expects %rax to be zero when exiting the inline asm,
  87. * in case it might reuse it somewhere.
  88. */
  89. xor %eax,%eax
  90. RET
  91. .Lqwords_exception:
  92. # convert remaining qwords back into bytes to return to caller
  93. shl $3, %rcx
  94. and $7, %eax
  95. add %rax,%rcx
  96. jmp .Lexit
  97. .Lbytes_exception:
  98. mov %eax,%ecx
  99. jmp .Lexit
  100. _ASM_EXTABLE_UA(.Lqwords, .Lqwords_exception)
  101. _ASM_EXTABLE_UA(.Lbytes, .Lbytes_exception)
  102. SYM_FUNC_END(clear_user_original)
  103. EXPORT_SYMBOL(clear_user_original)
  104. /*
  105. * Alternative clear user-space when CPU feature X86_FEATURE_REP_GOOD is
  106. * present.
  107. * Input:
  108. * rdi destination
  109. * rcx count
  110. *
  111. * Output:
  112. * rcx: uncleared bytes or 0 if successful.
  113. */
  114. SYM_FUNC_START(clear_user_rep_good)
  115. # call the original thing for less than a cacheline
  116. cmp $64, %rcx
  117. jb clear_user_original
  118. .Lprep:
  119. # copy lower 32-bits for rest bytes
  120. mov %ecx, %edx
  121. shr $3, %rcx
  122. jz .Lrep_good_rest_bytes
  123. .Lrep_good_qwords:
  124. rep stosq
  125. .Lrep_good_rest_bytes:
  126. and $7, %edx
  127. jz .Lrep_good_exit
  128. mov %edx, %ecx
  129. .Lrep_good_bytes:
  130. rep stosb
  131. .Lrep_good_exit:
  132. # see .Lexit comment above
  133. xor %eax, %eax
  134. RET
  135. .Lrep_good_qwords_exception:
  136. # convert remaining qwords back into bytes to return to caller
  137. shl $3, %rcx
  138. and $7, %edx
  139. add %rdx, %rcx
  140. jmp .Lrep_good_exit
  141. _ASM_EXTABLE_UA(.Lrep_good_qwords, .Lrep_good_qwords_exception)
  142. _ASM_EXTABLE_UA(.Lrep_good_bytes, .Lrep_good_exit)
  143. SYM_FUNC_END(clear_user_rep_good)
  144. EXPORT_SYMBOL(clear_user_rep_good)
  145. /*
  146. * Alternative clear user-space when CPU feature X86_FEATURE_ERMS is present.
  147. * Input:
  148. * rdi destination
  149. * rcx count
  150. *
  151. * Output:
  152. * rcx: uncleared bytes or 0 if successful.
  153. *
  154. */
  155. SYM_FUNC_START(clear_user_erms)
  156. # call the original thing for less than a cacheline
  157. cmp $64, %rcx
  158. jb clear_user_original
  159. .Lerms_bytes:
  160. rep stosb
  161. .Lerms_exit:
  162. xorl %eax,%eax
  163. RET
  164. _ASM_EXTABLE_UA(.Lerms_bytes, .Lerms_exit)
  165. SYM_FUNC_END(clear_user_erms)
  166. EXPORT_SYMBOL(clear_user_erms)