sha1_ni_asm.S 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. /*
  2. * Intel SHA Extensions optimized implementation of a SHA-1 update function
  3. *
  4. * This file is provided under a dual BSD/GPLv2 license. When using or
  5. * redistributing this file, you may do so under either license.
  6. *
  7. * GPL LICENSE SUMMARY
  8. *
  9. * Copyright(c) 2015 Intel Corporation.
  10. *
  11. * This program is free software; you can redistribute it and/or modify
  12. * it under the terms of version 2 of the GNU General Public License as
  13. * published by the Free Software Foundation.
  14. *
  15. * This program is distributed in the hope that it will be useful, but
  16. * WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * General Public License for more details.
  19. *
  20. * Contact Information:
  21. * Sean Gulley <[email protected]>
  22. * Tim Chen <[email protected]>
  23. *
  24. * BSD LICENSE
  25. *
  26. * Copyright(c) 2015 Intel Corporation.
  27. *
  28. * Redistribution and use in source and binary forms, with or without
  29. * modification, are permitted provided that the following conditions
  30. * are met:
  31. *
  32. * * Redistributions of source code must retain the above copyright
  33. * notice, this list of conditions and the following disclaimer.
  34. * * Redistributions in binary form must reproduce the above copyright
  35. * notice, this list of conditions and the following disclaimer in
  36. * the documentation and/or other materials provided with the
  37. * distribution.
  38. * * Neither the name of Intel Corporation nor the names of its
  39. * contributors may be used to endorse or promote products derived
  40. * from this software without specific prior written permission.
  41. *
  42. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  43. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  44. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  45. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  46. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  47. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  48. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  49. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  50. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  51. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  52. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  53. *
  54. */
  55. #include <linux/linkage.h>
  56. #include <linux/cfi_types.h>
  57. #define DIGEST_PTR %rdi /* 1st arg */
  58. #define DATA_PTR %rsi /* 2nd arg */
  59. #define NUM_BLKS %rdx /* 3rd arg */
  60. /* gcc conversion */
  61. #define FRAME_SIZE 32 /* space for 2x16 bytes */
  62. #define ABCD %xmm0
  63. #define E0 %xmm1 /* Need two E's b/c they ping pong */
  64. #define E1 %xmm2
  65. #define MSG0 %xmm3
  66. #define MSG1 %xmm4
  67. #define MSG2 %xmm5
  68. #define MSG3 %xmm6
  69. #define SHUF_MASK %xmm7
  70. /*
  71. * Intel SHA Extensions optimized implementation of a SHA-1 update function
  72. *
  73. * The function takes a pointer to the current hash values, a pointer to the
  74. * input data, and a number of 64 byte blocks to process. Once all blocks have
  75. * been processed, the digest pointer is updated with the resulting hash value.
  76. * The function only processes complete blocks, there is no functionality to
  77. * store partial blocks. All message padding and hash value initialization must
  78. * be done outside the update function.
  79. *
  80. * The indented lines in the loop are instructions related to rounds processing.
  81. * The non-indented lines are instructions related to the message schedule.
  82. *
  83. * void sha1_ni_transform(uint32_t *digest, const void *data,
  84. uint32_t numBlocks)
  85. * digest : pointer to digest
  86. * data: pointer to input data
  87. * numBlocks: Number of blocks to process
  88. */
  89. .text
  90. .align 32
  91. SYM_TYPED_FUNC_START(sha1_ni_transform)
  92. push %rbp
  93. mov %rsp, %rbp
  94. sub $FRAME_SIZE, %rsp
  95. and $~0xF, %rsp
  96. shl $6, NUM_BLKS /* convert to bytes */
  97. jz .Ldone_hash
  98. add DATA_PTR, NUM_BLKS /* pointer to end of data */
  99. /* load initial hash values */
  100. pinsrd $3, 1*16(DIGEST_PTR), E0
  101. movdqu 0*16(DIGEST_PTR), ABCD
  102. pand UPPER_WORD_MASK(%rip), E0
  103. pshufd $0x1B, ABCD, ABCD
  104. movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
  105. .Lloop0:
  106. /* Save hash values for addition after rounds */
  107. movdqa E0, (0*16)(%rsp)
  108. movdqa ABCD, (1*16)(%rsp)
  109. /* Rounds 0-3 */
  110. movdqu 0*16(DATA_PTR), MSG0
  111. pshufb SHUF_MASK, MSG0
  112. paddd MSG0, E0
  113. movdqa ABCD, E1
  114. sha1rnds4 $0, E0, ABCD
  115. /* Rounds 4-7 */
  116. movdqu 1*16(DATA_PTR), MSG1
  117. pshufb SHUF_MASK, MSG1
  118. sha1nexte MSG1, E1
  119. movdqa ABCD, E0
  120. sha1rnds4 $0, E1, ABCD
  121. sha1msg1 MSG1, MSG0
  122. /* Rounds 8-11 */
  123. movdqu 2*16(DATA_PTR), MSG2
  124. pshufb SHUF_MASK, MSG2
  125. sha1nexte MSG2, E0
  126. movdqa ABCD, E1
  127. sha1rnds4 $0, E0, ABCD
  128. sha1msg1 MSG2, MSG1
  129. pxor MSG2, MSG0
  130. /* Rounds 12-15 */
  131. movdqu 3*16(DATA_PTR), MSG3
  132. pshufb SHUF_MASK, MSG3
  133. sha1nexte MSG3, E1
  134. movdqa ABCD, E0
  135. sha1msg2 MSG3, MSG0
  136. sha1rnds4 $0, E1, ABCD
  137. sha1msg1 MSG3, MSG2
  138. pxor MSG3, MSG1
  139. /* Rounds 16-19 */
  140. sha1nexte MSG0, E0
  141. movdqa ABCD, E1
  142. sha1msg2 MSG0, MSG1
  143. sha1rnds4 $0, E0, ABCD
  144. sha1msg1 MSG0, MSG3
  145. pxor MSG0, MSG2
  146. /* Rounds 20-23 */
  147. sha1nexte MSG1, E1
  148. movdqa ABCD, E0
  149. sha1msg2 MSG1, MSG2
  150. sha1rnds4 $1, E1, ABCD
  151. sha1msg1 MSG1, MSG0
  152. pxor MSG1, MSG3
  153. /* Rounds 24-27 */
  154. sha1nexte MSG2, E0
  155. movdqa ABCD, E1
  156. sha1msg2 MSG2, MSG3
  157. sha1rnds4 $1, E0, ABCD
  158. sha1msg1 MSG2, MSG1
  159. pxor MSG2, MSG0
  160. /* Rounds 28-31 */
  161. sha1nexte MSG3, E1
  162. movdqa ABCD, E0
  163. sha1msg2 MSG3, MSG0
  164. sha1rnds4 $1, E1, ABCD
  165. sha1msg1 MSG3, MSG2
  166. pxor MSG3, MSG1
  167. /* Rounds 32-35 */
  168. sha1nexte MSG0, E0
  169. movdqa ABCD, E1
  170. sha1msg2 MSG0, MSG1
  171. sha1rnds4 $1, E0, ABCD
  172. sha1msg1 MSG0, MSG3
  173. pxor MSG0, MSG2
  174. /* Rounds 36-39 */
  175. sha1nexte MSG1, E1
  176. movdqa ABCD, E0
  177. sha1msg2 MSG1, MSG2
  178. sha1rnds4 $1, E1, ABCD
  179. sha1msg1 MSG1, MSG0
  180. pxor MSG1, MSG3
  181. /* Rounds 40-43 */
  182. sha1nexte MSG2, E0
  183. movdqa ABCD, E1
  184. sha1msg2 MSG2, MSG3
  185. sha1rnds4 $2, E0, ABCD
  186. sha1msg1 MSG2, MSG1
  187. pxor MSG2, MSG0
  188. /* Rounds 44-47 */
  189. sha1nexte MSG3, E1
  190. movdqa ABCD, E0
  191. sha1msg2 MSG3, MSG0
  192. sha1rnds4 $2, E1, ABCD
  193. sha1msg1 MSG3, MSG2
  194. pxor MSG3, MSG1
  195. /* Rounds 48-51 */
  196. sha1nexte MSG0, E0
  197. movdqa ABCD, E1
  198. sha1msg2 MSG0, MSG1
  199. sha1rnds4 $2, E0, ABCD
  200. sha1msg1 MSG0, MSG3
  201. pxor MSG0, MSG2
  202. /* Rounds 52-55 */
  203. sha1nexte MSG1, E1
  204. movdqa ABCD, E0
  205. sha1msg2 MSG1, MSG2
  206. sha1rnds4 $2, E1, ABCD
  207. sha1msg1 MSG1, MSG0
  208. pxor MSG1, MSG3
  209. /* Rounds 56-59 */
  210. sha1nexte MSG2, E0
  211. movdqa ABCD, E1
  212. sha1msg2 MSG2, MSG3
  213. sha1rnds4 $2, E0, ABCD
  214. sha1msg1 MSG2, MSG1
  215. pxor MSG2, MSG0
  216. /* Rounds 60-63 */
  217. sha1nexte MSG3, E1
  218. movdqa ABCD, E0
  219. sha1msg2 MSG3, MSG0
  220. sha1rnds4 $3, E1, ABCD
  221. sha1msg1 MSG3, MSG2
  222. pxor MSG3, MSG1
  223. /* Rounds 64-67 */
  224. sha1nexte MSG0, E0
  225. movdqa ABCD, E1
  226. sha1msg2 MSG0, MSG1
  227. sha1rnds4 $3, E0, ABCD
  228. sha1msg1 MSG0, MSG3
  229. pxor MSG0, MSG2
  230. /* Rounds 68-71 */
  231. sha1nexte MSG1, E1
  232. movdqa ABCD, E0
  233. sha1msg2 MSG1, MSG2
  234. sha1rnds4 $3, E1, ABCD
  235. pxor MSG1, MSG3
  236. /* Rounds 72-75 */
  237. sha1nexte MSG2, E0
  238. movdqa ABCD, E1
  239. sha1msg2 MSG2, MSG3
  240. sha1rnds4 $3, E0, ABCD
  241. /* Rounds 76-79 */
  242. sha1nexte MSG3, E1
  243. movdqa ABCD, E0
  244. sha1rnds4 $3, E1, ABCD
  245. /* Add current hash values with previously saved */
  246. sha1nexte (0*16)(%rsp), E0
  247. paddd (1*16)(%rsp), ABCD
  248. /* Increment data pointer and loop if more to process */
  249. add $64, DATA_PTR
  250. cmp NUM_BLKS, DATA_PTR
  251. jne .Lloop0
  252. /* Write hash values back in the correct order */
  253. pshufd $0x1B, ABCD, ABCD
  254. movdqu ABCD, 0*16(DIGEST_PTR)
  255. pextrd $3, E0, 1*16(DIGEST_PTR)
  256. .Ldone_hash:
  257. mov %rbp, %rsp
  258. pop %rbp
  259. RET
  260. SYM_FUNC_END(sha1_ni_transform)
  261. .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
  262. .align 16
  263. PSHUFFLE_BYTE_FLIP_MASK:
  264. .octa 0x000102030405060708090a0b0c0d0e0f
  265. .section .rodata.cst16.UPPER_WORD_MASK, "aM", @progbits, 16
  266. .align 16
  267. UPPER_WORD_MASK:
  268. .octa 0xFFFFFFFF000000000000000000000000