xor.h 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866
  1. /* SPDX-License-Identifier: GPL-2.0-or-later */
  2. /*
  3. * include/asm-alpha/xor.h
  4. *
  5. * Optimized RAID-5 checksumming functions for alpha EV5 and EV6
  6. */
  7. extern void
  8. xor_alpha_2(unsigned long bytes, unsigned long * __restrict p1,
  9. const unsigned long * __restrict p2);
  10. extern void
  11. xor_alpha_3(unsigned long bytes, unsigned long * __restrict p1,
  12. const unsigned long * __restrict p2,
  13. const unsigned long * __restrict p3);
  14. extern void
  15. xor_alpha_4(unsigned long bytes, unsigned long * __restrict p1,
  16. const unsigned long * __restrict p2,
  17. const unsigned long * __restrict p3,
  18. const unsigned long * __restrict p4);
  19. extern void
  20. xor_alpha_5(unsigned long bytes, unsigned long * __restrict p1,
  21. const unsigned long * __restrict p2,
  22. const unsigned long * __restrict p3,
  23. const unsigned long * __restrict p4,
  24. const unsigned long * __restrict p5);
  25. extern void
  26. xor_alpha_prefetch_2(unsigned long bytes, unsigned long * __restrict p1,
  27. const unsigned long * __restrict p2);
  28. extern void
  29. xor_alpha_prefetch_3(unsigned long bytes, unsigned long * __restrict p1,
  30. const unsigned long * __restrict p2,
  31. const unsigned long * __restrict p3);
  32. extern void
  33. xor_alpha_prefetch_4(unsigned long bytes, unsigned long * __restrict p1,
  34. const unsigned long * __restrict p2,
  35. const unsigned long * __restrict p3,
  36. const unsigned long * __restrict p4);
  37. extern void
  38. xor_alpha_prefetch_5(unsigned long bytes, unsigned long * __restrict p1,
  39. const unsigned long * __restrict p2,
  40. const unsigned long * __restrict p3,
  41. const unsigned long * __restrict p4,
  42. const unsigned long * __restrict p5);
  43. asm(" \n\
  44. .text \n\
  45. .align 3 \n\
  46. .ent xor_alpha_2 \n\
  47. xor_alpha_2: \n\
  48. .prologue 0 \n\
  49. srl $16, 6, $16 \n\
  50. .align 4 \n\
  51. 2: \n\
  52. ldq $0,0($17) \n\
  53. ldq $1,0($18) \n\
  54. ldq $2,8($17) \n\
  55. ldq $3,8($18) \n\
  56. \n\
  57. ldq $4,16($17) \n\
  58. ldq $5,16($18) \n\
  59. ldq $6,24($17) \n\
  60. ldq $7,24($18) \n\
  61. \n\
  62. ldq $19,32($17) \n\
  63. ldq $20,32($18) \n\
  64. ldq $21,40($17) \n\
  65. ldq $22,40($18) \n\
  66. \n\
  67. ldq $23,48($17) \n\
  68. ldq $24,48($18) \n\
  69. ldq $25,56($17) \n\
  70. xor $0,$1,$0 # 7 cycles from $1 load \n\
  71. \n\
  72. ldq $27,56($18) \n\
  73. xor $2,$3,$2 \n\
  74. stq $0,0($17) \n\
  75. xor $4,$5,$4 \n\
  76. \n\
  77. stq $2,8($17) \n\
  78. xor $6,$7,$6 \n\
  79. stq $4,16($17) \n\
  80. xor $19,$20,$19 \n\
  81. \n\
  82. stq $6,24($17) \n\
  83. xor $21,$22,$21 \n\
  84. stq $19,32($17) \n\
  85. xor $23,$24,$23 \n\
  86. \n\
  87. stq $21,40($17) \n\
  88. xor $25,$27,$25 \n\
  89. stq $23,48($17) \n\
  90. subq $16,1,$16 \n\
  91. \n\
  92. stq $25,56($17) \n\
  93. addq $17,64,$17 \n\
  94. addq $18,64,$18 \n\
  95. bgt $16,2b \n\
  96. \n\
  97. ret \n\
  98. .end xor_alpha_2 \n\
  99. \n\
  100. .align 3 \n\
  101. .ent xor_alpha_3 \n\
  102. xor_alpha_3: \n\
  103. .prologue 0 \n\
  104. srl $16, 6, $16 \n\
  105. .align 4 \n\
  106. 3: \n\
  107. ldq $0,0($17) \n\
  108. ldq $1,0($18) \n\
  109. ldq $2,0($19) \n\
  110. ldq $3,8($17) \n\
  111. \n\
  112. ldq $4,8($18) \n\
  113. ldq $6,16($17) \n\
  114. ldq $7,16($18) \n\
  115. ldq $21,24($17) \n\
  116. \n\
  117. ldq $22,24($18) \n\
  118. ldq $24,32($17) \n\
  119. ldq $25,32($18) \n\
  120. ldq $5,8($19) \n\
  121. \n\
  122. ldq $20,16($19) \n\
  123. ldq $23,24($19) \n\
  124. ldq $27,32($19) \n\
  125. nop \n\
  126. \n\
  127. xor $0,$1,$1 # 8 cycles from $0 load \n\
  128. xor $3,$4,$4 # 6 cycles from $4 load \n\
  129. xor $6,$7,$7 # 6 cycles from $7 load \n\
  130. xor $21,$22,$22 # 5 cycles from $22 load \n\
  131. \n\
  132. xor $1,$2,$2 # 9 cycles from $2 load \n\
  133. xor $24,$25,$25 # 5 cycles from $25 load \n\
  134. stq $2,0($17) \n\
  135. xor $4,$5,$5 # 6 cycles from $5 load \n\
  136. \n\
  137. stq $5,8($17) \n\
  138. xor $7,$20,$20 # 7 cycles from $20 load \n\
  139. stq $20,16($17) \n\
  140. xor $22,$23,$23 # 7 cycles from $23 load \n\
  141. \n\
  142. stq $23,24($17) \n\
  143. xor $25,$27,$27 # 7 cycles from $27 load \n\
  144. stq $27,32($17) \n\
  145. nop \n\
  146. \n\
  147. ldq $0,40($17) \n\
  148. ldq $1,40($18) \n\
  149. ldq $3,48($17) \n\
  150. ldq $4,48($18) \n\
  151. \n\
  152. ldq $6,56($17) \n\
  153. ldq $7,56($18) \n\
  154. ldq $2,40($19) \n\
  155. ldq $5,48($19) \n\
  156. \n\
  157. ldq $20,56($19) \n\
  158. xor $0,$1,$1 # 4 cycles from $1 load \n\
  159. xor $3,$4,$4 # 5 cycles from $4 load \n\
  160. xor $6,$7,$7 # 5 cycles from $7 load \n\
  161. \n\
  162. xor $1,$2,$2 # 4 cycles from $2 load \n\
  163. xor $4,$5,$5 # 5 cycles from $5 load \n\
  164. stq $2,40($17) \n\
  165. xor $7,$20,$20 # 4 cycles from $20 load \n\
  166. \n\
  167. stq $5,48($17) \n\
  168. subq $16,1,$16 \n\
  169. stq $20,56($17) \n\
  170. addq $19,64,$19 \n\
  171. \n\
  172. addq $18,64,$18 \n\
  173. addq $17,64,$17 \n\
  174. bgt $16,3b \n\
  175. ret \n\
  176. .end xor_alpha_3 \n\
  177. \n\
  178. .align 3 \n\
  179. .ent xor_alpha_4 \n\
  180. xor_alpha_4: \n\
  181. .prologue 0 \n\
  182. srl $16, 6, $16 \n\
  183. .align 4 \n\
  184. 4: \n\
  185. ldq $0,0($17) \n\
  186. ldq $1,0($18) \n\
  187. ldq $2,0($19) \n\
  188. ldq $3,0($20) \n\
  189. \n\
  190. ldq $4,8($17) \n\
  191. ldq $5,8($18) \n\
  192. ldq $6,8($19) \n\
  193. ldq $7,8($20) \n\
  194. \n\
  195. ldq $21,16($17) \n\
  196. ldq $22,16($18) \n\
  197. ldq $23,16($19) \n\
  198. ldq $24,16($20) \n\
  199. \n\
  200. ldq $25,24($17) \n\
  201. xor $0,$1,$1 # 6 cycles from $1 load \n\
  202. ldq $27,24($18) \n\
  203. xor $2,$3,$3 # 6 cycles from $3 load \n\
  204. \n\
  205. ldq $0,24($19) \n\
  206. xor $1,$3,$3 \n\
  207. ldq $1,24($20) \n\
  208. xor $4,$5,$5 # 7 cycles from $5 load \n\
  209. \n\
  210. stq $3,0($17) \n\
  211. xor $6,$7,$7 \n\
  212. xor $21,$22,$22 # 7 cycles from $22 load \n\
  213. xor $5,$7,$7 \n\
  214. \n\
  215. stq $7,8($17) \n\
  216. xor $23,$24,$24 # 7 cycles from $24 load \n\
  217. ldq $2,32($17) \n\
  218. xor $22,$24,$24 \n\
  219. \n\
  220. ldq $3,32($18) \n\
  221. ldq $4,32($19) \n\
  222. ldq $5,32($20) \n\
  223. xor $25,$27,$27 # 8 cycles from $27 load \n\
  224. \n\
  225. ldq $6,40($17) \n\
  226. ldq $7,40($18) \n\
  227. ldq $21,40($19) \n\
  228. ldq $22,40($20) \n\
  229. \n\
  230. stq $24,16($17) \n\
  231. xor $0,$1,$1 # 9 cycles from $1 load \n\
  232. xor $2,$3,$3 # 5 cycles from $3 load \n\
  233. xor $27,$1,$1 \n\
  234. \n\
  235. stq $1,24($17) \n\
  236. xor $4,$5,$5 # 5 cycles from $5 load \n\
  237. ldq $23,48($17) \n\
  238. ldq $24,48($18) \n\
  239. \n\
  240. ldq $25,48($19) \n\
  241. xor $3,$5,$5 \n\
  242. ldq $27,48($20) \n\
  243. ldq $0,56($17) \n\
  244. \n\
  245. ldq $1,56($18) \n\
  246. ldq $2,56($19) \n\
  247. xor $6,$7,$7 # 8 cycles from $6 load \n\
  248. ldq $3,56($20) \n\
  249. \n\
  250. stq $5,32($17) \n\
  251. xor $21,$22,$22 # 8 cycles from $22 load \n\
  252. xor $7,$22,$22 \n\
  253. xor $23,$24,$24 # 5 cycles from $24 load \n\
  254. \n\
  255. stq $22,40($17) \n\
  256. xor $25,$27,$27 # 5 cycles from $27 load \n\
  257. xor $24,$27,$27 \n\
  258. xor $0,$1,$1 # 5 cycles from $1 load \n\
  259. \n\
  260. stq $27,48($17) \n\
  261. xor $2,$3,$3 # 4 cycles from $3 load \n\
  262. xor $1,$3,$3 \n\
  263. subq $16,1,$16 \n\
  264. \n\
  265. stq $3,56($17) \n\
  266. addq $20,64,$20 \n\
  267. addq $19,64,$19 \n\
  268. addq $18,64,$18 \n\
  269. \n\
  270. addq $17,64,$17 \n\
  271. bgt $16,4b \n\
  272. ret \n\
  273. .end xor_alpha_4 \n\
  274. \n\
  275. .align 3 \n\
  276. .ent xor_alpha_5 \n\
  277. xor_alpha_5: \n\
  278. .prologue 0 \n\
  279. srl $16, 6, $16 \n\
  280. .align 4 \n\
  281. 5: \n\
  282. ldq $0,0($17) \n\
  283. ldq $1,0($18) \n\
  284. ldq $2,0($19) \n\
  285. ldq $3,0($20) \n\
  286. \n\
  287. ldq $4,0($21) \n\
  288. ldq $5,8($17) \n\
  289. ldq $6,8($18) \n\
  290. ldq $7,8($19) \n\
  291. \n\
  292. ldq $22,8($20) \n\
  293. ldq $23,8($21) \n\
  294. ldq $24,16($17) \n\
  295. ldq $25,16($18) \n\
  296. \n\
  297. ldq $27,16($19) \n\
  298. xor $0,$1,$1 # 6 cycles from $1 load \n\
  299. ldq $28,16($20) \n\
  300. xor $2,$3,$3 # 6 cycles from $3 load \n\
  301. \n\
  302. ldq $0,16($21) \n\
  303. xor $1,$3,$3 \n\
  304. ldq $1,24($17) \n\
  305. xor $3,$4,$4 # 7 cycles from $4 load \n\
  306. \n\
  307. stq $4,0($17) \n\
  308. xor $5,$6,$6 # 7 cycles from $6 load \n\
  309. xor $7,$22,$22 # 7 cycles from $22 load \n\
  310. xor $6,$23,$23 # 7 cycles from $23 load \n\
  311. \n\
  312. ldq $2,24($18) \n\
  313. xor $22,$23,$23 \n\
  314. ldq $3,24($19) \n\
  315. xor $24,$25,$25 # 8 cycles from $25 load \n\
  316. \n\
  317. stq $23,8($17) \n\
  318. xor $25,$27,$27 # 8 cycles from $27 load \n\
  319. ldq $4,24($20) \n\
  320. xor $28,$0,$0 # 7 cycles from $0 load \n\
  321. \n\
  322. ldq $5,24($21) \n\
  323. xor $27,$0,$0 \n\
  324. ldq $6,32($17) \n\
  325. ldq $7,32($18) \n\
  326. \n\
  327. stq $0,16($17) \n\
  328. xor $1,$2,$2 # 6 cycles from $2 load \n\
  329. ldq $22,32($19) \n\
  330. xor $3,$4,$4 # 4 cycles from $4 load \n\
  331. \n\
  332. ldq $23,32($20) \n\
  333. xor $2,$4,$4 \n\
  334. ldq $24,32($21) \n\
  335. ldq $25,40($17) \n\
  336. \n\
  337. ldq $27,40($18) \n\
  338. ldq $28,40($19) \n\
  339. ldq $0,40($20) \n\
  340. xor $4,$5,$5 # 7 cycles from $5 load \n\
  341. \n\
  342. stq $5,24($17) \n\
  343. xor $6,$7,$7 # 7 cycles from $7 load \n\
  344. ldq $1,40($21) \n\
  345. ldq $2,48($17) \n\
  346. \n\
  347. ldq $3,48($18) \n\
  348. xor $7,$22,$22 # 7 cycles from $22 load \n\
  349. ldq $4,48($19) \n\
  350. xor $23,$24,$24 # 6 cycles from $24 load \n\
  351. \n\
  352. ldq $5,48($20) \n\
  353. xor $22,$24,$24 \n\
  354. ldq $6,48($21) \n\
  355. xor $25,$27,$27 # 7 cycles from $27 load \n\
  356. \n\
  357. stq $24,32($17) \n\
  358. xor $27,$28,$28 # 8 cycles from $28 load \n\
  359. ldq $7,56($17) \n\
  360. xor $0,$1,$1 # 6 cycles from $1 load \n\
  361. \n\
  362. ldq $22,56($18) \n\
  363. ldq $23,56($19) \n\
  364. ldq $24,56($20) \n\
  365. ldq $25,56($21) \n\
  366. \n\
  367. xor $28,$1,$1 \n\
  368. xor $2,$3,$3 # 9 cycles from $3 load \n\
  369. xor $3,$4,$4 # 9 cycles from $4 load \n\
  370. xor $5,$6,$6 # 8 cycles from $6 load \n\
  371. \n\
  372. stq $1,40($17) \n\
  373. xor $4,$6,$6 \n\
  374. xor $7,$22,$22 # 7 cycles from $22 load \n\
  375. xor $23,$24,$24 # 6 cycles from $24 load \n\
  376. \n\
  377. stq $6,48($17) \n\
  378. xor $22,$24,$24 \n\
  379. subq $16,1,$16 \n\
  380. xor $24,$25,$25 # 8 cycles from $25 load \n\
  381. \n\
  382. stq $25,56($17) \n\
  383. addq $21,64,$21 \n\
  384. addq $20,64,$20 \n\
  385. addq $19,64,$19 \n\
  386. \n\
  387. addq $18,64,$18 \n\
  388. addq $17,64,$17 \n\
  389. bgt $16,5b \n\
  390. ret \n\
  391. .end xor_alpha_5 \n\
  392. \n\
  393. .align 3 \n\
  394. .ent xor_alpha_prefetch_2 \n\
  395. xor_alpha_prefetch_2: \n\
  396. .prologue 0 \n\
  397. srl $16, 6, $16 \n\
  398. \n\
  399. ldq $31, 0($17) \n\
  400. ldq $31, 0($18) \n\
  401. \n\
  402. ldq $31, 64($17) \n\
  403. ldq $31, 64($18) \n\
  404. \n\
  405. ldq $31, 128($17) \n\
  406. ldq $31, 128($18) \n\
  407. \n\
  408. ldq $31, 192($17) \n\
  409. ldq $31, 192($18) \n\
  410. .align 4 \n\
  411. 2: \n\
  412. ldq $0,0($17) \n\
  413. ldq $1,0($18) \n\
  414. ldq $2,8($17) \n\
  415. ldq $3,8($18) \n\
  416. \n\
  417. ldq $4,16($17) \n\
  418. ldq $5,16($18) \n\
  419. ldq $6,24($17) \n\
  420. ldq $7,24($18) \n\
  421. \n\
  422. ldq $19,32($17) \n\
  423. ldq $20,32($18) \n\
  424. ldq $21,40($17) \n\
  425. ldq $22,40($18) \n\
  426. \n\
  427. ldq $23,48($17) \n\
  428. ldq $24,48($18) \n\
  429. ldq $25,56($17) \n\
  430. ldq $27,56($18) \n\
  431. \n\
  432. ldq $31,256($17) \n\
  433. xor $0,$1,$0 # 8 cycles from $1 load \n\
  434. ldq $31,256($18) \n\
  435. xor $2,$3,$2 \n\
  436. \n\
  437. stq $0,0($17) \n\
  438. xor $4,$5,$4 \n\
  439. stq $2,8($17) \n\
  440. xor $6,$7,$6 \n\
  441. \n\
  442. stq $4,16($17) \n\
  443. xor $19,$20,$19 \n\
  444. stq $6,24($17) \n\
  445. xor $21,$22,$21 \n\
  446. \n\
  447. stq $19,32($17) \n\
  448. xor $23,$24,$23 \n\
  449. stq $21,40($17) \n\
  450. xor $25,$27,$25 \n\
  451. \n\
  452. stq $23,48($17) \n\
  453. subq $16,1,$16 \n\
  454. stq $25,56($17) \n\
  455. addq $17,64,$17 \n\
  456. \n\
  457. addq $18,64,$18 \n\
  458. bgt $16,2b \n\
  459. ret \n\
  460. .end xor_alpha_prefetch_2 \n\
  461. \n\
  462. .align 3 \n\
  463. .ent xor_alpha_prefetch_3 \n\
  464. xor_alpha_prefetch_3: \n\
  465. .prologue 0 \n\
  466. srl $16, 6, $16 \n\
  467. \n\
  468. ldq $31, 0($17) \n\
  469. ldq $31, 0($18) \n\
  470. ldq $31, 0($19) \n\
  471. \n\
  472. ldq $31, 64($17) \n\
  473. ldq $31, 64($18) \n\
  474. ldq $31, 64($19) \n\
  475. \n\
  476. ldq $31, 128($17) \n\
  477. ldq $31, 128($18) \n\
  478. ldq $31, 128($19) \n\
  479. \n\
  480. ldq $31, 192($17) \n\
  481. ldq $31, 192($18) \n\
  482. ldq $31, 192($19) \n\
  483. .align 4 \n\
  484. 3: \n\
  485. ldq $0,0($17) \n\
  486. ldq $1,0($18) \n\
  487. ldq $2,0($19) \n\
  488. ldq $3,8($17) \n\
  489. \n\
  490. ldq $4,8($18) \n\
  491. ldq $6,16($17) \n\
  492. ldq $7,16($18) \n\
  493. ldq $21,24($17) \n\
  494. \n\
  495. ldq $22,24($18) \n\
  496. ldq $24,32($17) \n\
  497. ldq $25,32($18) \n\
  498. ldq $5,8($19) \n\
  499. \n\
  500. ldq $20,16($19) \n\
  501. ldq $23,24($19) \n\
  502. ldq $27,32($19) \n\
  503. nop \n\
  504. \n\
  505. xor $0,$1,$1 # 8 cycles from $0 load \n\
  506. xor $3,$4,$4 # 7 cycles from $4 load \n\
  507. xor $6,$7,$7 # 6 cycles from $7 load \n\
  508. xor $21,$22,$22 # 5 cycles from $22 load \n\
  509. \n\
  510. xor $1,$2,$2 # 9 cycles from $2 load \n\
  511. xor $24,$25,$25 # 5 cycles from $25 load \n\
  512. stq $2,0($17) \n\
  513. xor $4,$5,$5 # 6 cycles from $5 load \n\
  514. \n\
  515. stq $5,8($17) \n\
  516. xor $7,$20,$20 # 7 cycles from $20 load \n\
  517. stq $20,16($17) \n\
  518. xor $22,$23,$23 # 7 cycles from $23 load \n\
  519. \n\
  520. stq $23,24($17) \n\
  521. xor $25,$27,$27 # 7 cycles from $27 load \n\
  522. stq $27,32($17) \n\
  523. nop \n\
  524. \n\
  525. ldq $0,40($17) \n\
  526. ldq $1,40($18) \n\
  527. ldq $3,48($17) \n\
  528. ldq $4,48($18) \n\
  529. \n\
  530. ldq $6,56($17) \n\
  531. ldq $7,56($18) \n\
  532. ldq $2,40($19) \n\
  533. ldq $5,48($19) \n\
  534. \n\
  535. ldq $20,56($19) \n\
  536. ldq $31,256($17) \n\
  537. ldq $31,256($18) \n\
  538. ldq $31,256($19) \n\
  539. \n\
  540. xor $0,$1,$1 # 6 cycles from $1 load \n\
  541. xor $3,$4,$4 # 5 cycles from $4 load \n\
  542. xor $6,$7,$7 # 5 cycles from $7 load \n\
  543. xor $1,$2,$2 # 4 cycles from $2 load \n\
  544. \n\
  545. xor $4,$5,$5 # 5 cycles from $5 load \n\
  546. xor $7,$20,$20 # 4 cycles from $20 load \n\
  547. stq $2,40($17) \n\
  548. subq $16,1,$16 \n\
  549. \n\
  550. stq $5,48($17) \n\
  551. addq $19,64,$19 \n\
  552. stq $20,56($17) \n\
  553. addq $18,64,$18 \n\
  554. \n\
  555. addq $17,64,$17 \n\
  556. bgt $16,3b \n\
  557. ret \n\
  558. .end xor_alpha_prefetch_3 \n\
  559. \n\
  560. .align 3 \n\
  561. .ent xor_alpha_prefetch_4 \n\
  562. xor_alpha_prefetch_4: \n\
  563. .prologue 0 \n\
  564. srl $16, 6, $16 \n\
  565. \n\
  566. ldq $31, 0($17) \n\
  567. ldq $31, 0($18) \n\
  568. ldq $31, 0($19) \n\
  569. ldq $31, 0($20) \n\
  570. \n\
  571. ldq $31, 64($17) \n\
  572. ldq $31, 64($18) \n\
  573. ldq $31, 64($19) \n\
  574. ldq $31, 64($20) \n\
  575. \n\
  576. ldq $31, 128($17) \n\
  577. ldq $31, 128($18) \n\
  578. ldq $31, 128($19) \n\
  579. ldq $31, 128($20) \n\
  580. \n\
  581. ldq $31, 192($17) \n\
  582. ldq $31, 192($18) \n\
  583. ldq $31, 192($19) \n\
  584. ldq $31, 192($20) \n\
  585. .align 4 \n\
  586. 4: \n\
  587. ldq $0,0($17) \n\
  588. ldq $1,0($18) \n\
  589. ldq $2,0($19) \n\
  590. ldq $3,0($20) \n\
  591. \n\
  592. ldq $4,8($17) \n\
  593. ldq $5,8($18) \n\
  594. ldq $6,8($19) \n\
  595. ldq $7,8($20) \n\
  596. \n\
  597. ldq $21,16($17) \n\
  598. ldq $22,16($18) \n\
  599. ldq $23,16($19) \n\
  600. ldq $24,16($20) \n\
  601. \n\
  602. ldq $25,24($17) \n\
  603. xor $0,$1,$1 # 6 cycles from $1 load \n\
  604. ldq $27,24($18) \n\
  605. xor $2,$3,$3 # 6 cycles from $3 load \n\
  606. \n\
  607. ldq $0,24($19) \n\
  608. xor $1,$3,$3 \n\
  609. ldq $1,24($20) \n\
  610. xor $4,$5,$5 # 7 cycles from $5 load \n\
  611. \n\
  612. stq $3,0($17) \n\
  613. xor $6,$7,$7 \n\
  614. xor $21,$22,$22 # 7 cycles from $22 load \n\
  615. xor $5,$7,$7 \n\
  616. \n\
  617. stq $7,8($17) \n\
  618. xor $23,$24,$24 # 7 cycles from $24 load \n\
  619. ldq $2,32($17) \n\
  620. xor $22,$24,$24 \n\
  621. \n\
  622. ldq $3,32($18) \n\
  623. ldq $4,32($19) \n\
  624. ldq $5,32($20) \n\
  625. xor $25,$27,$27 # 8 cycles from $27 load \n\
  626. \n\
  627. ldq $6,40($17) \n\
  628. ldq $7,40($18) \n\
  629. ldq $21,40($19) \n\
  630. ldq $22,40($20) \n\
  631. \n\
  632. stq $24,16($17) \n\
  633. xor $0,$1,$1 # 9 cycles from $1 load \n\
  634. xor $2,$3,$3 # 5 cycles from $3 load \n\
  635. xor $27,$1,$1 \n\
  636. \n\
  637. stq $1,24($17) \n\
  638. xor $4,$5,$5 # 5 cycles from $5 load \n\
  639. ldq $23,48($17) \n\
  640. xor $3,$5,$5 \n\
  641. \n\
  642. ldq $24,48($18) \n\
  643. ldq $25,48($19) \n\
  644. ldq $27,48($20) \n\
  645. ldq $0,56($17) \n\
  646. \n\
  647. ldq $1,56($18) \n\
  648. ldq $2,56($19) \n\
  649. ldq $3,56($20) \n\
  650. xor $6,$7,$7 # 8 cycles from $6 load \n\
  651. \n\
  652. ldq $31,256($17) \n\
  653. xor $21,$22,$22 # 8 cycles from $22 load \n\
  654. ldq $31,256($18) \n\
  655. xor $7,$22,$22 \n\
  656. \n\
  657. ldq $31,256($19) \n\
  658. xor $23,$24,$24 # 6 cycles from $24 load \n\
  659. ldq $31,256($20) \n\
  660. xor $25,$27,$27 # 6 cycles from $27 load \n\
  661. \n\
  662. stq $5,32($17) \n\
  663. xor $24,$27,$27 \n\
  664. xor $0,$1,$1 # 7 cycles from $1 load \n\
  665. xor $2,$3,$3 # 6 cycles from $3 load \n\
  666. \n\
  667. stq $22,40($17) \n\
  668. xor $1,$3,$3 \n\
  669. stq $27,48($17) \n\
  670. subq $16,1,$16 \n\
  671. \n\
  672. stq $3,56($17) \n\
  673. addq $20,64,$20 \n\
  674. addq $19,64,$19 \n\
  675. addq $18,64,$18 \n\
  676. \n\
  677. addq $17,64,$17 \n\
  678. bgt $16,4b \n\
  679. ret \n\
  680. .end xor_alpha_prefetch_4 \n\
  681. \n\
  682. .align 3 \n\
  683. .ent xor_alpha_prefetch_5 \n\
  684. xor_alpha_prefetch_5: \n\
  685. .prologue 0 \n\
  686. srl $16, 6, $16 \n\
  687. \n\
  688. ldq $31, 0($17) \n\
  689. ldq $31, 0($18) \n\
  690. ldq $31, 0($19) \n\
  691. ldq $31, 0($20) \n\
  692. ldq $31, 0($21) \n\
  693. \n\
  694. ldq $31, 64($17) \n\
  695. ldq $31, 64($18) \n\
  696. ldq $31, 64($19) \n\
  697. ldq $31, 64($20) \n\
  698. ldq $31, 64($21) \n\
  699. \n\
  700. ldq $31, 128($17) \n\
  701. ldq $31, 128($18) \n\
  702. ldq $31, 128($19) \n\
  703. ldq $31, 128($20) \n\
  704. ldq $31, 128($21) \n\
  705. \n\
  706. ldq $31, 192($17) \n\
  707. ldq $31, 192($18) \n\
  708. ldq $31, 192($19) \n\
  709. ldq $31, 192($20) \n\
  710. ldq $31, 192($21) \n\
  711. .align 4 \n\
  712. 5: \n\
  713. ldq $0,0($17) \n\
  714. ldq $1,0($18) \n\
  715. ldq $2,0($19) \n\
  716. ldq $3,0($20) \n\
  717. \n\
  718. ldq $4,0($21) \n\
  719. ldq $5,8($17) \n\
  720. ldq $6,8($18) \n\
  721. ldq $7,8($19) \n\
  722. \n\
  723. ldq $22,8($20) \n\
  724. ldq $23,8($21) \n\
  725. ldq $24,16($17) \n\
  726. ldq $25,16($18) \n\
  727. \n\
  728. ldq $27,16($19) \n\
  729. xor $0,$1,$1 # 6 cycles from $1 load \n\
  730. ldq $28,16($20) \n\
  731. xor $2,$3,$3 # 6 cycles from $3 load \n\
  732. \n\
  733. ldq $0,16($21) \n\
  734. xor $1,$3,$3 \n\
  735. ldq $1,24($17) \n\
  736. xor $3,$4,$4 # 7 cycles from $4 load \n\
  737. \n\
  738. stq $4,0($17) \n\
  739. xor $5,$6,$6 # 7 cycles from $6 load \n\
  740. xor $7,$22,$22 # 7 cycles from $22 load \n\
  741. xor $6,$23,$23 # 7 cycles from $23 load \n\
  742. \n\
  743. ldq $2,24($18) \n\
  744. xor $22,$23,$23 \n\
  745. ldq $3,24($19) \n\
  746. xor $24,$25,$25 # 8 cycles from $25 load \n\
  747. \n\
  748. stq $23,8($17) \n\
  749. xor $25,$27,$27 # 8 cycles from $27 load \n\
  750. ldq $4,24($20) \n\
  751. xor $28,$0,$0 # 7 cycles from $0 load \n\
  752. \n\
  753. ldq $5,24($21) \n\
  754. xor $27,$0,$0 \n\
  755. ldq $6,32($17) \n\
  756. ldq $7,32($18) \n\
  757. \n\
  758. stq $0,16($17) \n\
  759. xor $1,$2,$2 # 6 cycles from $2 load \n\
  760. ldq $22,32($19) \n\
  761. xor $3,$4,$4 # 4 cycles from $4 load \n\
  762. \n\
  763. ldq $23,32($20) \n\
  764. xor $2,$4,$4 \n\
  765. ldq $24,32($21) \n\
  766. ldq $25,40($17) \n\
  767. \n\
  768. ldq $27,40($18) \n\
  769. ldq $28,40($19) \n\
  770. ldq $0,40($20) \n\
  771. xor $4,$5,$5 # 7 cycles from $5 load \n\
  772. \n\
  773. stq $5,24($17) \n\
  774. xor $6,$7,$7 # 7 cycles from $7 load \n\
  775. ldq $1,40($21) \n\
  776. ldq $2,48($17) \n\
  777. \n\
  778. ldq $3,48($18) \n\
  779. xor $7,$22,$22 # 7 cycles from $22 load \n\
  780. ldq $4,48($19) \n\
  781. xor $23,$24,$24 # 6 cycles from $24 load \n\
  782. \n\
  783. ldq $5,48($20) \n\
  784. xor $22,$24,$24 \n\
  785. ldq $6,48($21) \n\
  786. xor $25,$27,$27 # 7 cycles from $27 load \n\
  787. \n\
  788. stq $24,32($17) \n\
  789. xor $27,$28,$28 # 8 cycles from $28 load \n\
  790. ldq $7,56($17) \n\
  791. xor $0,$1,$1 # 6 cycles from $1 load \n\
  792. \n\
  793. ldq $22,56($18) \n\
  794. ldq $23,56($19) \n\
  795. ldq $24,56($20) \n\
  796. ldq $25,56($21) \n\
  797. \n\
  798. ldq $31,256($17) \n\
  799. xor $28,$1,$1 \n\
  800. ldq $31,256($18) \n\
  801. xor $2,$3,$3 # 9 cycles from $3 load \n\
  802. \n\
  803. ldq $31,256($19) \n\
  804. xor $3,$4,$4 # 9 cycles from $4 load \n\
  805. ldq $31,256($20) \n\
  806. xor $5,$6,$6 # 8 cycles from $6 load \n\
  807. \n\
  808. stq $1,40($17) \n\
  809. xor $4,$6,$6 \n\
  810. xor $7,$22,$22 # 7 cycles from $22 load \n\
  811. xor $23,$24,$24 # 6 cycles from $24 load \n\
  812. \n\
  813. stq $6,48($17) \n\
  814. xor $22,$24,$24 \n\
  815. ldq $31,256($21) \n\
  816. xor $24,$25,$25 # 8 cycles from $25 load \n\
  817. \n\
  818. stq $25,56($17) \n\
  819. subq $16,1,$16 \n\
  820. addq $21,64,$21 \n\
  821. addq $20,64,$20 \n\
  822. \n\
  823. addq $19,64,$19 \n\
  824. addq $18,64,$18 \n\
  825. addq $17,64,$17 \n\
  826. bgt $16,5b \n\
  827. \n\
  828. ret \n\
  829. .end xor_alpha_prefetch_5 \n\
  830. ");
  831. static struct xor_block_template xor_block_alpha = {
  832. .name = "alpha",
  833. .do_2 = xor_alpha_2,
  834. .do_3 = xor_alpha_3,
  835. .do_4 = xor_alpha_4,
  836. .do_5 = xor_alpha_5,
  837. };
  838. static struct xor_block_template xor_block_alpha_prefetch = {
  839. .name = "alpha prefetch",
  840. .do_2 = xor_alpha_prefetch_2,
  841. .do_3 = xor_alpha_prefetch_3,
  842. .do_4 = xor_alpha_prefetch_4,
  843. .do_5 = xor_alpha_prefetch_5,
  844. };
  845. /* For grins, also test the generic routines. */
  846. #include <asm-generic/xor.h>
  847. #undef XOR_TRY_TEMPLATES
  848. #define XOR_TRY_TEMPLATES \
  849. do { \
  850. xor_speed(&xor_block_8regs); \
  851. xor_speed(&xor_block_32regs); \
  852. xor_speed(&xor_block_alpha); \
  853. xor_speed(&xor_block_alpha_prefetch); \
  854. } while (0)
  855. /* Force the use of alpha_prefetch if EV6, as it is significantly
  856. faster in the cold cache case. */
  857. #define XOR_SELECT_TEMPLATE(FASTEST) \
  858. (implver() == IMPLVER_EV6 ? &xor_block_alpha_prefetch : FASTEST)