sha256_ni_asm.S 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. /*
  2. * Intel SHA Extensions optimized implementation of a SHA-256 update function
  3. *
  4. * This file is provided under a dual BSD/GPLv2 license. When using or
  5. * redistributing this file, you may do so under either license.
  6. *
  7. * GPL LICENSE SUMMARY
  8. *
  9. * Copyright(c) 2015 Intel Corporation.
  10. *
  11. * This program is free software; you can redistribute it and/or modify
  12. * it under the terms of version 2 of the GNU General Public License as
  13. * published by the Free Software Foundation.
  14. *
  15. * This program is distributed in the hope that it will be useful, but
  16. * WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * General Public License for more details.
  19. *
  20. * Contact Information:
  21. * Sean Gulley <[email protected]>
  22. * Tim Chen <[email protected]>
  23. *
  24. * BSD LICENSE
  25. *
  26. * Copyright(c) 2015 Intel Corporation.
  27. *
  28. * Redistribution and use in source and binary forms, with or without
  29. * modification, are permitted provided that the following conditions
  30. * are met:
  31. *
  32. * * Redistributions of source code must retain the above copyright
  33. * notice, this list of conditions and the following disclaimer.
  34. * * Redistributions in binary form must reproduce the above copyright
  35. * notice, this list of conditions and the following disclaimer in
  36. * the documentation and/or other materials provided with the
  37. * distribution.
  38. * * Neither the name of Intel Corporation nor the names of its
  39. * contributors may be used to endorse or promote products derived
  40. * from this software without specific prior written permission.
  41. *
  42. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  43. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  44. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  45. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  46. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  47. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  48. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  49. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  50. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  51. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  52. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  53. *
  54. */
  55. #include <linux/linkage.h>
  56. #include <linux/cfi_types.h>
  57. #define DIGEST_PTR %rdi /* 1st arg */
  58. #define DATA_PTR %rsi /* 2nd arg */
  59. #define NUM_BLKS %rdx /* 3rd arg */
  60. #define SHA256CONSTANTS %rax
  61. #define MSG %xmm0
  62. #define STATE0 %xmm1
  63. #define STATE1 %xmm2
  64. #define MSGTMP0 %xmm3
  65. #define MSGTMP1 %xmm4
  66. #define MSGTMP2 %xmm5
  67. #define MSGTMP3 %xmm6
  68. #define MSGTMP4 %xmm7
  69. #define SHUF_MASK %xmm8
  70. #define ABEF_SAVE %xmm9
  71. #define CDGH_SAVE %xmm10
  72. /*
  73. * Intel SHA Extensions optimized implementation of a SHA-256 update function
  74. *
  75. * The function takes a pointer to the current hash values, a pointer to the
  76. * input data, and a number of 64 byte blocks to process. Once all blocks have
  77. * been processed, the digest pointer is updated with the resulting hash value.
  78. * The function only processes complete blocks, there is no functionality to
  79. * store partial blocks. All message padding and hash value initialization must
  80. * be done outside the update function.
  81. *
  82. * The indented lines in the loop are instructions related to rounds processing.
  83. * The non-indented lines are instructions related to the message schedule.
  84. *
  85. * void sha256_ni_transform(uint32_t *digest, const void *data,
  86. uint32_t numBlocks);
  87. * digest : pointer to digest
  88. * data: pointer to input data
  89. * numBlocks: Number of blocks to process
  90. */
  91. .text
  92. .align 32
  93. SYM_TYPED_FUNC_START(sha256_ni_transform)
  94. shl $6, NUM_BLKS /* convert to bytes */
  95. jz .Ldone_hash
  96. add DATA_PTR, NUM_BLKS /* pointer to end of data */
  97. /*
  98. * load initial hash values
  99. * Need to reorder these appropriately
  100. * DCBA, HGFE -> ABEF, CDGH
  101. */
  102. movdqu 0*16(DIGEST_PTR), STATE0
  103. movdqu 1*16(DIGEST_PTR), STATE1
  104. pshufd $0xB1, STATE0, STATE0 /* CDAB */
  105. pshufd $0x1B, STATE1, STATE1 /* EFGH */
  106. movdqa STATE0, MSGTMP4
  107. palignr $8, STATE1, STATE0 /* ABEF */
  108. pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */
  109. movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
  110. lea K256(%rip), SHA256CONSTANTS
  111. .Lloop0:
  112. /* Save hash values for addition after rounds */
  113. movdqa STATE0, ABEF_SAVE
  114. movdqa STATE1, CDGH_SAVE
  115. /* Rounds 0-3 */
  116. movdqu 0*16(DATA_PTR), MSG
  117. pshufb SHUF_MASK, MSG
  118. movdqa MSG, MSGTMP0
  119. paddd 0*16(SHA256CONSTANTS), MSG
  120. sha256rnds2 STATE0, STATE1
  121. pshufd $0x0E, MSG, MSG
  122. sha256rnds2 STATE1, STATE0
  123. /* Rounds 4-7 */
  124. movdqu 1*16(DATA_PTR), MSG
  125. pshufb SHUF_MASK, MSG
  126. movdqa MSG, MSGTMP1
  127. paddd 1*16(SHA256CONSTANTS), MSG
  128. sha256rnds2 STATE0, STATE1
  129. pshufd $0x0E, MSG, MSG
  130. sha256rnds2 STATE1, STATE0
  131. sha256msg1 MSGTMP1, MSGTMP0
  132. /* Rounds 8-11 */
  133. movdqu 2*16(DATA_PTR), MSG
  134. pshufb SHUF_MASK, MSG
  135. movdqa MSG, MSGTMP2
  136. paddd 2*16(SHA256CONSTANTS), MSG
  137. sha256rnds2 STATE0, STATE1
  138. pshufd $0x0E, MSG, MSG
  139. sha256rnds2 STATE1, STATE0
  140. sha256msg1 MSGTMP2, MSGTMP1
  141. /* Rounds 12-15 */
  142. movdqu 3*16(DATA_PTR), MSG
  143. pshufb SHUF_MASK, MSG
  144. movdqa MSG, MSGTMP3
  145. paddd 3*16(SHA256CONSTANTS), MSG
  146. sha256rnds2 STATE0, STATE1
  147. movdqa MSGTMP3, MSGTMP4
  148. palignr $4, MSGTMP2, MSGTMP4
  149. paddd MSGTMP4, MSGTMP0
  150. sha256msg2 MSGTMP3, MSGTMP0
  151. pshufd $0x0E, MSG, MSG
  152. sha256rnds2 STATE1, STATE0
  153. sha256msg1 MSGTMP3, MSGTMP2
  154. /* Rounds 16-19 */
  155. movdqa MSGTMP0, MSG
  156. paddd 4*16(SHA256CONSTANTS), MSG
  157. sha256rnds2 STATE0, STATE1
  158. movdqa MSGTMP0, MSGTMP4
  159. palignr $4, MSGTMP3, MSGTMP4
  160. paddd MSGTMP4, MSGTMP1
  161. sha256msg2 MSGTMP0, MSGTMP1
  162. pshufd $0x0E, MSG, MSG
  163. sha256rnds2 STATE1, STATE0
  164. sha256msg1 MSGTMP0, MSGTMP3
  165. /* Rounds 20-23 */
  166. movdqa MSGTMP1, MSG
  167. paddd 5*16(SHA256CONSTANTS), MSG
  168. sha256rnds2 STATE0, STATE1
  169. movdqa MSGTMP1, MSGTMP4
  170. palignr $4, MSGTMP0, MSGTMP4
  171. paddd MSGTMP4, MSGTMP2
  172. sha256msg2 MSGTMP1, MSGTMP2
  173. pshufd $0x0E, MSG, MSG
  174. sha256rnds2 STATE1, STATE0
  175. sha256msg1 MSGTMP1, MSGTMP0
  176. /* Rounds 24-27 */
  177. movdqa MSGTMP2, MSG
  178. paddd 6*16(SHA256CONSTANTS), MSG
  179. sha256rnds2 STATE0, STATE1
  180. movdqa MSGTMP2, MSGTMP4
  181. palignr $4, MSGTMP1, MSGTMP4
  182. paddd MSGTMP4, MSGTMP3
  183. sha256msg2 MSGTMP2, MSGTMP3
  184. pshufd $0x0E, MSG, MSG
  185. sha256rnds2 STATE1, STATE0
  186. sha256msg1 MSGTMP2, MSGTMP1
  187. /* Rounds 28-31 */
  188. movdqa MSGTMP3, MSG
  189. paddd 7*16(SHA256CONSTANTS), MSG
  190. sha256rnds2 STATE0, STATE1
  191. movdqa MSGTMP3, MSGTMP4
  192. palignr $4, MSGTMP2, MSGTMP4
  193. paddd MSGTMP4, MSGTMP0
  194. sha256msg2 MSGTMP3, MSGTMP0
  195. pshufd $0x0E, MSG, MSG
  196. sha256rnds2 STATE1, STATE0
  197. sha256msg1 MSGTMP3, MSGTMP2
  198. /* Rounds 32-35 */
  199. movdqa MSGTMP0, MSG
  200. paddd 8*16(SHA256CONSTANTS), MSG
  201. sha256rnds2 STATE0, STATE1
  202. movdqa MSGTMP0, MSGTMP4
  203. palignr $4, MSGTMP3, MSGTMP4
  204. paddd MSGTMP4, MSGTMP1
  205. sha256msg2 MSGTMP0, MSGTMP1
  206. pshufd $0x0E, MSG, MSG
  207. sha256rnds2 STATE1, STATE0
  208. sha256msg1 MSGTMP0, MSGTMP3
  209. /* Rounds 36-39 */
  210. movdqa MSGTMP1, MSG
  211. paddd 9*16(SHA256CONSTANTS), MSG
  212. sha256rnds2 STATE0, STATE1
  213. movdqa MSGTMP1, MSGTMP4
  214. palignr $4, MSGTMP0, MSGTMP4
  215. paddd MSGTMP4, MSGTMP2
  216. sha256msg2 MSGTMP1, MSGTMP2
  217. pshufd $0x0E, MSG, MSG
  218. sha256rnds2 STATE1, STATE0
  219. sha256msg1 MSGTMP1, MSGTMP0
  220. /* Rounds 40-43 */
  221. movdqa MSGTMP2, MSG
  222. paddd 10*16(SHA256CONSTANTS), MSG
  223. sha256rnds2 STATE0, STATE1
  224. movdqa MSGTMP2, MSGTMP4
  225. palignr $4, MSGTMP1, MSGTMP4
  226. paddd MSGTMP4, MSGTMP3
  227. sha256msg2 MSGTMP2, MSGTMP3
  228. pshufd $0x0E, MSG, MSG
  229. sha256rnds2 STATE1, STATE0
  230. sha256msg1 MSGTMP2, MSGTMP1
  231. /* Rounds 44-47 */
  232. movdqa MSGTMP3, MSG
  233. paddd 11*16(SHA256CONSTANTS), MSG
  234. sha256rnds2 STATE0, STATE1
  235. movdqa MSGTMP3, MSGTMP4
  236. palignr $4, MSGTMP2, MSGTMP4
  237. paddd MSGTMP4, MSGTMP0
  238. sha256msg2 MSGTMP3, MSGTMP0
  239. pshufd $0x0E, MSG, MSG
  240. sha256rnds2 STATE1, STATE0
  241. sha256msg1 MSGTMP3, MSGTMP2
  242. /* Rounds 48-51 */
  243. movdqa MSGTMP0, MSG
  244. paddd 12*16(SHA256CONSTANTS), MSG
  245. sha256rnds2 STATE0, STATE1
  246. movdqa MSGTMP0, MSGTMP4
  247. palignr $4, MSGTMP3, MSGTMP4
  248. paddd MSGTMP4, MSGTMP1
  249. sha256msg2 MSGTMP0, MSGTMP1
  250. pshufd $0x0E, MSG, MSG
  251. sha256rnds2 STATE1, STATE0
  252. sha256msg1 MSGTMP0, MSGTMP3
  253. /* Rounds 52-55 */
  254. movdqa MSGTMP1, MSG
  255. paddd 13*16(SHA256CONSTANTS), MSG
  256. sha256rnds2 STATE0, STATE1
  257. movdqa MSGTMP1, MSGTMP4
  258. palignr $4, MSGTMP0, MSGTMP4
  259. paddd MSGTMP4, MSGTMP2
  260. sha256msg2 MSGTMP1, MSGTMP2
  261. pshufd $0x0E, MSG, MSG
  262. sha256rnds2 STATE1, STATE0
  263. /* Rounds 56-59 */
  264. movdqa MSGTMP2, MSG
  265. paddd 14*16(SHA256CONSTANTS), MSG
  266. sha256rnds2 STATE0, STATE1
  267. movdqa MSGTMP2, MSGTMP4
  268. palignr $4, MSGTMP1, MSGTMP4
  269. paddd MSGTMP4, MSGTMP3
  270. sha256msg2 MSGTMP2, MSGTMP3
  271. pshufd $0x0E, MSG, MSG
  272. sha256rnds2 STATE1, STATE0
  273. /* Rounds 60-63 */
  274. movdqa MSGTMP3, MSG
  275. paddd 15*16(SHA256CONSTANTS), MSG
  276. sha256rnds2 STATE0, STATE1
  277. pshufd $0x0E, MSG, MSG
  278. sha256rnds2 STATE1, STATE0
  279. /* Add current hash values with previously saved */
  280. paddd ABEF_SAVE, STATE0
  281. paddd CDGH_SAVE, STATE1
  282. /* Increment data pointer and loop if more to process */
  283. add $64, DATA_PTR
  284. cmp NUM_BLKS, DATA_PTR
  285. jne .Lloop0
  286. /* Write hash values back in the correct order */
  287. pshufd $0x1B, STATE0, STATE0 /* FEBA */
  288. pshufd $0xB1, STATE1, STATE1 /* DCHG */
  289. movdqa STATE0, MSGTMP4
  290. pblendw $0xF0, STATE1, STATE0 /* DCBA */
  291. palignr $8, MSGTMP4, STATE1 /* HGFE */
  292. movdqu STATE0, 0*16(DIGEST_PTR)
  293. movdqu STATE1, 1*16(DIGEST_PTR)
  294. .Ldone_hash:
  295. RET
  296. SYM_FUNC_END(sha256_ni_transform)
  297. .section .rodata.cst256.K256, "aM", @progbits, 256
  298. .align 64
  299. K256:
  300. .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
  301. .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
  302. .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
  303. .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
  304. .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
  305. .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
  306. .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
  307. .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
  308. .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
  309. .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
  310. .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
  311. .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
  312. .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
  313. .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
  314. .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
  315. .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
  316. .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
  317. .align 16
  318. PSHUFFLE_BYTE_FLIP_MASK:
  319. .octa 0x0c0d0e0f08090a0b0405060700010203