aegis128-aesni-asm.S 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748
  1. /* SPDX-License-Identifier: GPL-2.0-only */
  2. /*
  3. * AES-NI + SSE2 implementation of AEGIS-128
  4. *
  5. * Copyright (c) 2017-2018 Ondrej Mosnacek <[email protected]>
  6. * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
  7. */
  8. #include <linux/linkage.h>
  9. #include <linux/cfi_types.h>
  10. #include <asm/frame.h>
  11. #define STATE0 %xmm0
  12. #define STATE1 %xmm1
  13. #define STATE2 %xmm2
  14. #define STATE3 %xmm3
  15. #define STATE4 %xmm4
  16. #define KEY %xmm5
  17. #define MSG %xmm5
  18. #define T0 %xmm6
  19. #define T1 %xmm7
  20. #define STATEP %rdi
  21. #define LEN %rsi
  22. #define SRC %rdx
  23. #define DST %rcx
  24. .section .rodata.cst16.aegis128_const, "aM", @progbits, 32
  25. .align 16
  26. .Laegis128_const_0:
  27. .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
  28. .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
  29. .Laegis128_const_1:
  30. .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
  31. .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
  32. .section .rodata.cst16.aegis128_counter, "aM", @progbits, 16
  33. .align 16
  34. .Laegis128_counter:
  35. .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
  36. .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
  37. .text
  38. /*
  39. * aegis128_update
  40. * input:
  41. * STATE[0-4] - input state
  42. * output:
  43. * STATE[0-4] - output state (shifted positions)
  44. * changed:
  45. * T0
  46. */
  47. .macro aegis128_update
  48. movdqa STATE4, T0
  49. aesenc STATE0, STATE4
  50. aesenc STATE1, STATE0
  51. aesenc STATE2, STATE1
  52. aesenc STATE3, STATE2
  53. aesenc T0, STATE3
  54. .endm
  55. /*
  56. * __load_partial: internal ABI
  57. * input:
  58. * LEN - bytes
  59. * SRC - src
  60. * output:
  61. * MSG - message block
  62. * changed:
  63. * T0
  64. * %r8
  65. * %r9
  66. */
  67. SYM_FUNC_START_LOCAL(__load_partial)
  68. xor %r9d, %r9d
  69. pxor MSG, MSG
  70. mov LEN, %r8
  71. and $0x1, %r8
  72. jz .Lld_partial_1
  73. mov LEN, %r8
  74. and $0x1E, %r8
  75. add SRC, %r8
  76. mov (%r8), %r9b
  77. .Lld_partial_1:
  78. mov LEN, %r8
  79. and $0x2, %r8
  80. jz .Lld_partial_2
  81. mov LEN, %r8
  82. and $0x1C, %r8
  83. add SRC, %r8
  84. shl $0x10, %r9
  85. mov (%r8), %r9w
  86. .Lld_partial_2:
  87. mov LEN, %r8
  88. and $0x4, %r8
  89. jz .Lld_partial_4
  90. mov LEN, %r8
  91. and $0x18, %r8
  92. add SRC, %r8
  93. shl $32, %r9
  94. mov (%r8), %r8d
  95. xor %r8, %r9
  96. .Lld_partial_4:
  97. movq %r9, MSG
  98. mov LEN, %r8
  99. and $0x8, %r8
  100. jz .Lld_partial_8
  101. mov LEN, %r8
  102. and $0x10, %r8
  103. add SRC, %r8
  104. pslldq $8, MSG
  105. movq (%r8), T0
  106. pxor T0, MSG
  107. .Lld_partial_8:
  108. RET
  109. SYM_FUNC_END(__load_partial)
  110. /*
  111. * __store_partial: internal ABI
  112. * input:
  113. * LEN - bytes
  114. * DST - dst
  115. * output:
  116. * T0 - message block
  117. * changed:
  118. * %r8
  119. * %r9
  120. * %r10
  121. */
  122. SYM_FUNC_START_LOCAL(__store_partial)
  123. mov LEN, %r8
  124. mov DST, %r9
  125. movq T0, %r10
  126. cmp $8, %r8
  127. jl .Lst_partial_8
  128. mov %r10, (%r9)
  129. psrldq $8, T0
  130. movq T0, %r10
  131. sub $8, %r8
  132. add $8, %r9
  133. .Lst_partial_8:
  134. cmp $4, %r8
  135. jl .Lst_partial_4
  136. mov %r10d, (%r9)
  137. shr $32, %r10
  138. sub $4, %r8
  139. add $4, %r9
  140. .Lst_partial_4:
  141. cmp $2, %r8
  142. jl .Lst_partial_2
  143. mov %r10w, (%r9)
  144. shr $0x10, %r10
  145. sub $2, %r8
  146. add $2, %r9
  147. .Lst_partial_2:
  148. cmp $1, %r8
  149. jl .Lst_partial_1
  150. mov %r10b, (%r9)
  151. .Lst_partial_1:
  152. RET
  153. SYM_FUNC_END(__store_partial)
  154. /*
  155. * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv);
  156. */
  157. SYM_FUNC_START(crypto_aegis128_aesni_init)
  158. FRAME_BEGIN
  159. /* load IV: */
  160. movdqu (%rdx), T1
  161. /* load key: */
  162. movdqa (%rsi), KEY
  163. pxor KEY, T1
  164. movdqa T1, STATE0
  165. movdqa KEY, STATE3
  166. movdqa KEY, STATE4
  167. /* load the constants: */
  168. movdqa .Laegis128_const_0, STATE2
  169. movdqa .Laegis128_const_1, STATE1
  170. pxor STATE2, STATE3
  171. pxor STATE1, STATE4
  172. /* update 10 times with KEY / KEY xor IV: */
  173. aegis128_update; pxor KEY, STATE4
  174. aegis128_update; pxor T1, STATE3
  175. aegis128_update; pxor KEY, STATE2
  176. aegis128_update; pxor T1, STATE1
  177. aegis128_update; pxor KEY, STATE0
  178. aegis128_update; pxor T1, STATE4
  179. aegis128_update; pxor KEY, STATE3
  180. aegis128_update; pxor T1, STATE2
  181. aegis128_update; pxor KEY, STATE1
  182. aegis128_update; pxor T1, STATE0
  183. /* store the state: */
  184. movdqu STATE0, 0x00(STATEP)
  185. movdqu STATE1, 0x10(STATEP)
  186. movdqu STATE2, 0x20(STATEP)
  187. movdqu STATE3, 0x30(STATEP)
  188. movdqu STATE4, 0x40(STATEP)
  189. FRAME_END
  190. RET
  191. SYM_FUNC_END(crypto_aegis128_aesni_init)
  192. /*
  193. * void crypto_aegis128_aesni_ad(void *state, unsigned int length,
  194. * const void *data);
  195. */
  196. SYM_FUNC_START(crypto_aegis128_aesni_ad)
  197. FRAME_BEGIN
  198. cmp $0x10, LEN
  199. jb .Lad_out
  200. /* load the state: */
  201. movdqu 0x00(STATEP), STATE0
  202. movdqu 0x10(STATEP), STATE1
  203. movdqu 0x20(STATEP), STATE2
  204. movdqu 0x30(STATEP), STATE3
  205. movdqu 0x40(STATEP), STATE4
  206. mov SRC, %r8
  207. and $0xF, %r8
  208. jnz .Lad_u_loop
  209. .align 8
  210. .Lad_a_loop:
  211. movdqa 0x00(SRC), MSG
  212. aegis128_update
  213. pxor MSG, STATE4
  214. sub $0x10, LEN
  215. cmp $0x10, LEN
  216. jl .Lad_out_1
  217. movdqa 0x10(SRC), MSG
  218. aegis128_update
  219. pxor MSG, STATE3
  220. sub $0x10, LEN
  221. cmp $0x10, LEN
  222. jl .Lad_out_2
  223. movdqa 0x20(SRC), MSG
  224. aegis128_update
  225. pxor MSG, STATE2
  226. sub $0x10, LEN
  227. cmp $0x10, LEN
  228. jl .Lad_out_3
  229. movdqa 0x30(SRC), MSG
  230. aegis128_update
  231. pxor MSG, STATE1
  232. sub $0x10, LEN
  233. cmp $0x10, LEN
  234. jl .Lad_out_4
  235. movdqa 0x40(SRC), MSG
  236. aegis128_update
  237. pxor MSG, STATE0
  238. sub $0x10, LEN
  239. cmp $0x10, LEN
  240. jl .Lad_out_0
  241. add $0x50, SRC
  242. jmp .Lad_a_loop
  243. .align 8
  244. .Lad_u_loop:
  245. movdqu 0x00(SRC), MSG
  246. aegis128_update
  247. pxor MSG, STATE4
  248. sub $0x10, LEN
  249. cmp $0x10, LEN
  250. jl .Lad_out_1
  251. movdqu 0x10(SRC), MSG
  252. aegis128_update
  253. pxor MSG, STATE3
  254. sub $0x10, LEN
  255. cmp $0x10, LEN
  256. jl .Lad_out_2
  257. movdqu 0x20(SRC), MSG
  258. aegis128_update
  259. pxor MSG, STATE2
  260. sub $0x10, LEN
  261. cmp $0x10, LEN
  262. jl .Lad_out_3
  263. movdqu 0x30(SRC), MSG
  264. aegis128_update
  265. pxor MSG, STATE1
  266. sub $0x10, LEN
  267. cmp $0x10, LEN
  268. jl .Lad_out_4
  269. movdqu 0x40(SRC), MSG
  270. aegis128_update
  271. pxor MSG, STATE0
  272. sub $0x10, LEN
  273. cmp $0x10, LEN
  274. jl .Lad_out_0
  275. add $0x50, SRC
  276. jmp .Lad_u_loop
  277. /* store the state: */
  278. .Lad_out_0:
  279. movdqu STATE0, 0x00(STATEP)
  280. movdqu STATE1, 0x10(STATEP)
  281. movdqu STATE2, 0x20(STATEP)
  282. movdqu STATE3, 0x30(STATEP)
  283. movdqu STATE4, 0x40(STATEP)
  284. FRAME_END
  285. RET
  286. .Lad_out_1:
  287. movdqu STATE4, 0x00(STATEP)
  288. movdqu STATE0, 0x10(STATEP)
  289. movdqu STATE1, 0x20(STATEP)
  290. movdqu STATE2, 0x30(STATEP)
  291. movdqu STATE3, 0x40(STATEP)
  292. FRAME_END
  293. RET
  294. .Lad_out_2:
  295. movdqu STATE3, 0x00(STATEP)
  296. movdqu STATE4, 0x10(STATEP)
  297. movdqu STATE0, 0x20(STATEP)
  298. movdqu STATE1, 0x30(STATEP)
  299. movdqu STATE2, 0x40(STATEP)
  300. FRAME_END
  301. RET
  302. .Lad_out_3:
  303. movdqu STATE2, 0x00(STATEP)
  304. movdqu STATE3, 0x10(STATEP)
  305. movdqu STATE4, 0x20(STATEP)
  306. movdqu STATE0, 0x30(STATEP)
  307. movdqu STATE1, 0x40(STATEP)
  308. FRAME_END
  309. RET
  310. .Lad_out_4:
  311. movdqu STATE1, 0x00(STATEP)
  312. movdqu STATE2, 0x10(STATEP)
  313. movdqu STATE3, 0x20(STATEP)
  314. movdqu STATE4, 0x30(STATEP)
  315. movdqu STATE0, 0x40(STATEP)
  316. FRAME_END
  317. RET
  318. .Lad_out:
  319. FRAME_END
  320. RET
  321. SYM_FUNC_END(crypto_aegis128_aesni_ad)
  322. .macro encrypt_block a s0 s1 s2 s3 s4 i
  323. movdq\a (\i * 0x10)(SRC), MSG
  324. movdqa MSG, T0
  325. pxor \s1, T0
  326. pxor \s4, T0
  327. movdqa \s2, T1
  328. pand \s3, T1
  329. pxor T1, T0
  330. movdq\a T0, (\i * 0x10)(DST)
  331. aegis128_update
  332. pxor MSG, \s4
  333. sub $0x10, LEN
  334. cmp $0x10, LEN
  335. jl .Lenc_out_\i
  336. .endm
  337. /*
  338. * void crypto_aegis128_aesni_enc(void *state, unsigned int length,
  339. * const void *src, void *dst);
  340. */
  341. SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc)
  342. FRAME_BEGIN
  343. cmp $0x10, LEN
  344. jb .Lenc_out
  345. /* load the state: */
  346. movdqu 0x00(STATEP), STATE0
  347. movdqu 0x10(STATEP), STATE1
  348. movdqu 0x20(STATEP), STATE2
  349. movdqu 0x30(STATEP), STATE3
  350. movdqu 0x40(STATEP), STATE4
  351. mov SRC, %r8
  352. or DST, %r8
  353. and $0xF, %r8
  354. jnz .Lenc_u_loop
  355. .align 8
  356. .Lenc_a_loop:
  357. encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
  358. encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
  359. encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
  360. encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
  361. encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
  362. add $0x50, SRC
  363. add $0x50, DST
  364. jmp .Lenc_a_loop
  365. .align 8
  366. .Lenc_u_loop:
  367. encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
  368. encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
  369. encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
  370. encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
  371. encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
  372. add $0x50, SRC
  373. add $0x50, DST
  374. jmp .Lenc_u_loop
  375. /* store the state: */
  376. .Lenc_out_0:
  377. movdqu STATE4, 0x00(STATEP)
  378. movdqu STATE0, 0x10(STATEP)
  379. movdqu STATE1, 0x20(STATEP)
  380. movdqu STATE2, 0x30(STATEP)
  381. movdqu STATE3, 0x40(STATEP)
  382. FRAME_END
  383. RET
  384. .Lenc_out_1:
  385. movdqu STATE3, 0x00(STATEP)
  386. movdqu STATE4, 0x10(STATEP)
  387. movdqu STATE0, 0x20(STATEP)
  388. movdqu STATE1, 0x30(STATEP)
  389. movdqu STATE2, 0x40(STATEP)
  390. FRAME_END
  391. RET
  392. .Lenc_out_2:
  393. movdqu STATE2, 0x00(STATEP)
  394. movdqu STATE3, 0x10(STATEP)
  395. movdqu STATE4, 0x20(STATEP)
  396. movdqu STATE0, 0x30(STATEP)
  397. movdqu STATE1, 0x40(STATEP)
  398. FRAME_END
  399. RET
  400. .Lenc_out_3:
  401. movdqu STATE1, 0x00(STATEP)
  402. movdqu STATE2, 0x10(STATEP)
  403. movdqu STATE3, 0x20(STATEP)
  404. movdqu STATE4, 0x30(STATEP)
  405. movdqu STATE0, 0x40(STATEP)
  406. FRAME_END
  407. RET
  408. .Lenc_out_4:
  409. movdqu STATE0, 0x00(STATEP)
  410. movdqu STATE1, 0x10(STATEP)
  411. movdqu STATE2, 0x20(STATEP)
  412. movdqu STATE3, 0x30(STATEP)
  413. movdqu STATE4, 0x40(STATEP)
  414. FRAME_END
  415. RET
  416. .Lenc_out:
  417. FRAME_END
  418. RET
  419. SYM_FUNC_END(crypto_aegis128_aesni_enc)
  420. /*
  421. * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length,
  422. * const void *src, void *dst);
  423. */
  424. SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc_tail)
  425. FRAME_BEGIN
  426. /* load the state: */
  427. movdqu 0x00(STATEP), STATE0
  428. movdqu 0x10(STATEP), STATE1
  429. movdqu 0x20(STATEP), STATE2
  430. movdqu 0x30(STATEP), STATE3
  431. movdqu 0x40(STATEP), STATE4
  432. /* encrypt message: */
  433. call __load_partial
  434. movdqa MSG, T0
  435. pxor STATE1, T0
  436. pxor STATE4, T0
  437. movdqa STATE2, T1
  438. pand STATE3, T1
  439. pxor T1, T0
  440. call __store_partial
  441. aegis128_update
  442. pxor MSG, STATE4
  443. /* store the state: */
  444. movdqu STATE4, 0x00(STATEP)
  445. movdqu STATE0, 0x10(STATEP)
  446. movdqu STATE1, 0x20(STATEP)
  447. movdqu STATE2, 0x30(STATEP)
  448. movdqu STATE3, 0x40(STATEP)
  449. FRAME_END
  450. RET
  451. SYM_FUNC_END(crypto_aegis128_aesni_enc_tail)
  452. .macro decrypt_block a s0 s1 s2 s3 s4 i
  453. movdq\a (\i * 0x10)(SRC), MSG
  454. pxor \s1, MSG
  455. pxor \s4, MSG
  456. movdqa \s2, T1
  457. pand \s3, T1
  458. pxor T1, MSG
  459. movdq\a MSG, (\i * 0x10)(DST)
  460. aegis128_update
  461. pxor MSG, \s4
  462. sub $0x10, LEN
  463. cmp $0x10, LEN
  464. jl .Ldec_out_\i
  465. .endm
  466. /*
  467. * void crypto_aegis128_aesni_dec(void *state, unsigned int length,
  468. * const void *src, void *dst);
  469. */
  470. SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec)
  471. FRAME_BEGIN
  472. cmp $0x10, LEN
  473. jb .Ldec_out
  474. /* load the state: */
  475. movdqu 0x00(STATEP), STATE0
  476. movdqu 0x10(STATEP), STATE1
  477. movdqu 0x20(STATEP), STATE2
  478. movdqu 0x30(STATEP), STATE3
  479. movdqu 0x40(STATEP), STATE4
  480. mov SRC, %r8
  481. or DST, %r8
  482. and $0xF, %r8
  483. jnz .Ldec_u_loop
  484. .align 8
  485. .Ldec_a_loop:
  486. decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
  487. decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
  488. decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
  489. decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
  490. decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
  491. add $0x50, SRC
  492. add $0x50, DST
  493. jmp .Ldec_a_loop
  494. .align 8
  495. .Ldec_u_loop:
  496. decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
  497. decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
  498. decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
  499. decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
  500. decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
  501. add $0x50, SRC
  502. add $0x50, DST
  503. jmp .Ldec_u_loop
  504. /* store the state: */
  505. .Ldec_out_0:
  506. movdqu STATE4, 0x00(STATEP)
  507. movdqu STATE0, 0x10(STATEP)
  508. movdqu STATE1, 0x20(STATEP)
  509. movdqu STATE2, 0x30(STATEP)
  510. movdqu STATE3, 0x40(STATEP)
  511. FRAME_END
  512. RET
  513. .Ldec_out_1:
  514. movdqu STATE3, 0x00(STATEP)
  515. movdqu STATE4, 0x10(STATEP)
  516. movdqu STATE0, 0x20(STATEP)
  517. movdqu STATE1, 0x30(STATEP)
  518. movdqu STATE2, 0x40(STATEP)
  519. FRAME_END
  520. RET
  521. .Ldec_out_2:
  522. movdqu STATE2, 0x00(STATEP)
  523. movdqu STATE3, 0x10(STATEP)
  524. movdqu STATE4, 0x20(STATEP)
  525. movdqu STATE0, 0x30(STATEP)
  526. movdqu STATE1, 0x40(STATEP)
  527. FRAME_END
  528. RET
  529. .Ldec_out_3:
  530. movdqu STATE1, 0x00(STATEP)
  531. movdqu STATE2, 0x10(STATEP)
  532. movdqu STATE3, 0x20(STATEP)
  533. movdqu STATE4, 0x30(STATEP)
  534. movdqu STATE0, 0x40(STATEP)
  535. FRAME_END
  536. RET
  537. .Ldec_out_4:
  538. movdqu STATE0, 0x00(STATEP)
  539. movdqu STATE1, 0x10(STATEP)
  540. movdqu STATE2, 0x20(STATEP)
  541. movdqu STATE3, 0x30(STATEP)
  542. movdqu STATE4, 0x40(STATEP)
  543. FRAME_END
  544. RET
  545. .Ldec_out:
  546. FRAME_END
  547. RET
  548. SYM_FUNC_END(crypto_aegis128_aesni_dec)
  549. /*
  550. * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length,
  551. * const void *src, void *dst);
  552. */
  553. SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail)
  554. FRAME_BEGIN
  555. /* load the state: */
  556. movdqu 0x00(STATEP), STATE0
  557. movdqu 0x10(STATEP), STATE1
  558. movdqu 0x20(STATEP), STATE2
  559. movdqu 0x30(STATEP), STATE3
  560. movdqu 0x40(STATEP), STATE4
  561. /* decrypt message: */
  562. call __load_partial
  563. pxor STATE1, MSG
  564. pxor STATE4, MSG
  565. movdqa STATE2, T1
  566. pand STATE3, T1
  567. pxor T1, MSG
  568. movdqa MSG, T0
  569. call __store_partial
  570. /* mask with byte count: */
  571. movq LEN, T0
  572. punpcklbw T0, T0
  573. punpcklbw T0, T0
  574. punpcklbw T0, T0
  575. punpcklbw T0, T0
  576. movdqa .Laegis128_counter, T1
  577. pcmpgtb T1, T0
  578. pand T0, MSG
  579. aegis128_update
  580. pxor MSG, STATE4
  581. /* store the state: */
  582. movdqu STATE4, 0x00(STATEP)
  583. movdqu STATE0, 0x10(STATEP)
  584. movdqu STATE1, 0x20(STATEP)
  585. movdqu STATE2, 0x30(STATEP)
  586. movdqu STATE3, 0x40(STATEP)
  587. FRAME_END
  588. RET
  589. SYM_FUNC_END(crypto_aegis128_aesni_dec_tail)
  590. /*
  591. * void crypto_aegis128_aesni_final(void *state, void *tag_xor,
  592. * u64 assoclen, u64 cryptlen);
  593. */
  594. SYM_FUNC_START(crypto_aegis128_aesni_final)
  595. FRAME_BEGIN
  596. /* load the state: */
  597. movdqu 0x00(STATEP), STATE0
  598. movdqu 0x10(STATEP), STATE1
  599. movdqu 0x20(STATEP), STATE2
  600. movdqu 0x30(STATEP), STATE3
  601. movdqu 0x40(STATEP), STATE4
  602. /* prepare length block: */
  603. movq %rdx, MSG
  604. movq %rcx, T0
  605. pslldq $8, T0
  606. pxor T0, MSG
  607. psllq $3, MSG /* multiply by 8 (to get bit count) */
  608. pxor STATE3, MSG
  609. /* update state: */
  610. aegis128_update; pxor MSG, STATE4
  611. aegis128_update; pxor MSG, STATE3
  612. aegis128_update; pxor MSG, STATE2
  613. aegis128_update; pxor MSG, STATE1
  614. aegis128_update; pxor MSG, STATE0
  615. aegis128_update; pxor MSG, STATE4
  616. aegis128_update; pxor MSG, STATE3
  617. /* xor tag: */
  618. movdqu (%rsi), MSG
  619. pxor STATE0, MSG
  620. pxor STATE1, MSG
  621. pxor STATE2, MSG
  622. pxor STATE3, MSG
  623. pxor STATE4, MSG
  624. movdqu MSG, (%rsi)
  625. FRAME_END
  626. RET
  627. SYM_FUNC_END(crypto_aegis128_aesni_final)