xor.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738
  1. /* SPDX-License-Identifier: GPL-2.0-or-later */
  2. /*
  3. * include/asm-generic/xor.h
  4. *
  5. * Generic optimized RAID-5 checksumming functions.
  6. */
  7. #include <linux/prefetch.h>
  8. static void
  9. xor_8regs_2(unsigned long bytes, unsigned long * __restrict p1,
  10. const unsigned long * __restrict p2)
  11. {
  12. long lines = bytes / (sizeof (long)) / 8;
  13. do {
  14. p1[0] ^= p2[0];
  15. p1[1] ^= p2[1];
  16. p1[2] ^= p2[2];
  17. p1[3] ^= p2[3];
  18. p1[4] ^= p2[4];
  19. p1[5] ^= p2[5];
  20. p1[6] ^= p2[6];
  21. p1[7] ^= p2[7];
  22. p1 += 8;
  23. p2 += 8;
  24. } while (--lines > 0);
  25. }
  26. static void
  27. xor_8regs_3(unsigned long bytes, unsigned long * __restrict p1,
  28. const unsigned long * __restrict p2,
  29. const unsigned long * __restrict p3)
  30. {
  31. long lines = bytes / (sizeof (long)) / 8;
  32. do {
  33. p1[0] ^= p2[0] ^ p3[0];
  34. p1[1] ^= p2[1] ^ p3[1];
  35. p1[2] ^= p2[2] ^ p3[2];
  36. p1[3] ^= p2[3] ^ p3[3];
  37. p1[4] ^= p2[4] ^ p3[4];
  38. p1[5] ^= p2[5] ^ p3[5];
  39. p1[6] ^= p2[6] ^ p3[6];
  40. p1[7] ^= p2[7] ^ p3[7];
  41. p1 += 8;
  42. p2 += 8;
  43. p3 += 8;
  44. } while (--lines > 0);
  45. }
  46. static void
  47. xor_8regs_4(unsigned long bytes, unsigned long * __restrict p1,
  48. const unsigned long * __restrict p2,
  49. const unsigned long * __restrict p3,
  50. const unsigned long * __restrict p4)
  51. {
  52. long lines = bytes / (sizeof (long)) / 8;
  53. do {
  54. p1[0] ^= p2[0] ^ p3[0] ^ p4[0];
  55. p1[1] ^= p2[1] ^ p3[1] ^ p4[1];
  56. p1[2] ^= p2[2] ^ p3[2] ^ p4[2];
  57. p1[3] ^= p2[3] ^ p3[3] ^ p4[3];
  58. p1[4] ^= p2[4] ^ p3[4] ^ p4[4];
  59. p1[5] ^= p2[5] ^ p3[5] ^ p4[5];
  60. p1[6] ^= p2[6] ^ p3[6] ^ p4[6];
  61. p1[7] ^= p2[7] ^ p3[7] ^ p4[7];
  62. p1 += 8;
  63. p2 += 8;
  64. p3 += 8;
  65. p4 += 8;
  66. } while (--lines > 0);
  67. }
  68. static void
  69. xor_8regs_5(unsigned long bytes, unsigned long * __restrict p1,
  70. const unsigned long * __restrict p2,
  71. const unsigned long * __restrict p3,
  72. const unsigned long * __restrict p4,
  73. const unsigned long * __restrict p5)
  74. {
  75. long lines = bytes / (sizeof (long)) / 8;
  76. do {
  77. p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0];
  78. p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1];
  79. p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2];
  80. p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3];
  81. p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4];
  82. p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5];
  83. p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6];
  84. p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7];
  85. p1 += 8;
  86. p2 += 8;
  87. p3 += 8;
  88. p4 += 8;
  89. p5 += 8;
  90. } while (--lines > 0);
  91. }
  92. static void
  93. xor_32regs_2(unsigned long bytes, unsigned long * __restrict p1,
  94. const unsigned long * __restrict p2)
  95. {
  96. long lines = bytes / (sizeof (long)) / 8;
  97. do {
  98. register long d0, d1, d2, d3, d4, d5, d6, d7;
  99. d0 = p1[0]; /* Pull the stuff into registers */
  100. d1 = p1[1]; /* ... in bursts, if possible. */
  101. d2 = p1[2];
  102. d3 = p1[3];
  103. d4 = p1[4];
  104. d5 = p1[5];
  105. d6 = p1[6];
  106. d7 = p1[7];
  107. d0 ^= p2[0];
  108. d1 ^= p2[1];
  109. d2 ^= p2[2];
  110. d3 ^= p2[3];
  111. d4 ^= p2[4];
  112. d5 ^= p2[5];
  113. d6 ^= p2[6];
  114. d7 ^= p2[7];
  115. p1[0] = d0; /* Store the result (in bursts) */
  116. p1[1] = d1;
  117. p1[2] = d2;
  118. p1[3] = d3;
  119. p1[4] = d4;
  120. p1[5] = d5;
  121. p1[6] = d6;
  122. p1[7] = d7;
  123. p1 += 8;
  124. p2 += 8;
  125. } while (--lines > 0);
  126. }
  127. static void
  128. xor_32regs_3(unsigned long bytes, unsigned long * __restrict p1,
  129. const unsigned long * __restrict p2,
  130. const unsigned long * __restrict p3)
  131. {
  132. long lines = bytes / (sizeof (long)) / 8;
  133. do {
  134. register long d0, d1, d2, d3, d4, d5, d6, d7;
  135. d0 = p1[0]; /* Pull the stuff into registers */
  136. d1 = p1[1]; /* ... in bursts, if possible. */
  137. d2 = p1[2];
  138. d3 = p1[3];
  139. d4 = p1[4];
  140. d5 = p1[5];
  141. d6 = p1[6];
  142. d7 = p1[7];
  143. d0 ^= p2[0];
  144. d1 ^= p2[1];
  145. d2 ^= p2[2];
  146. d3 ^= p2[3];
  147. d4 ^= p2[4];
  148. d5 ^= p2[5];
  149. d6 ^= p2[6];
  150. d7 ^= p2[7];
  151. d0 ^= p3[0];
  152. d1 ^= p3[1];
  153. d2 ^= p3[2];
  154. d3 ^= p3[3];
  155. d4 ^= p3[4];
  156. d5 ^= p3[5];
  157. d6 ^= p3[6];
  158. d7 ^= p3[7];
  159. p1[0] = d0; /* Store the result (in bursts) */
  160. p1[1] = d1;
  161. p1[2] = d2;
  162. p1[3] = d3;
  163. p1[4] = d4;
  164. p1[5] = d5;
  165. p1[6] = d6;
  166. p1[7] = d7;
  167. p1 += 8;
  168. p2 += 8;
  169. p3 += 8;
  170. } while (--lines > 0);
  171. }
  172. static void
  173. xor_32regs_4(unsigned long bytes, unsigned long * __restrict p1,
  174. const unsigned long * __restrict p2,
  175. const unsigned long * __restrict p3,
  176. const unsigned long * __restrict p4)
  177. {
  178. long lines = bytes / (sizeof (long)) / 8;
  179. do {
  180. register long d0, d1, d2, d3, d4, d5, d6, d7;
  181. d0 = p1[0]; /* Pull the stuff into registers */
  182. d1 = p1[1]; /* ... in bursts, if possible. */
  183. d2 = p1[2];
  184. d3 = p1[3];
  185. d4 = p1[4];
  186. d5 = p1[5];
  187. d6 = p1[6];
  188. d7 = p1[7];
  189. d0 ^= p2[0];
  190. d1 ^= p2[1];
  191. d2 ^= p2[2];
  192. d3 ^= p2[3];
  193. d4 ^= p2[4];
  194. d5 ^= p2[5];
  195. d6 ^= p2[6];
  196. d7 ^= p2[7];
  197. d0 ^= p3[0];
  198. d1 ^= p3[1];
  199. d2 ^= p3[2];
  200. d3 ^= p3[3];
  201. d4 ^= p3[4];
  202. d5 ^= p3[5];
  203. d6 ^= p3[6];
  204. d7 ^= p3[7];
  205. d0 ^= p4[0];
  206. d1 ^= p4[1];
  207. d2 ^= p4[2];
  208. d3 ^= p4[3];
  209. d4 ^= p4[4];
  210. d5 ^= p4[5];
  211. d6 ^= p4[6];
  212. d7 ^= p4[7];
  213. p1[0] = d0; /* Store the result (in bursts) */
  214. p1[1] = d1;
  215. p1[2] = d2;
  216. p1[3] = d3;
  217. p1[4] = d4;
  218. p1[5] = d5;
  219. p1[6] = d6;
  220. p1[7] = d7;
  221. p1 += 8;
  222. p2 += 8;
  223. p3 += 8;
  224. p4 += 8;
  225. } while (--lines > 0);
  226. }
  227. static void
  228. xor_32regs_5(unsigned long bytes, unsigned long * __restrict p1,
  229. const unsigned long * __restrict p2,
  230. const unsigned long * __restrict p3,
  231. const unsigned long * __restrict p4,
  232. const unsigned long * __restrict p5)
  233. {
  234. long lines = bytes / (sizeof (long)) / 8;
  235. do {
  236. register long d0, d1, d2, d3, d4, d5, d6, d7;
  237. d0 = p1[0]; /* Pull the stuff into registers */
  238. d1 = p1[1]; /* ... in bursts, if possible. */
  239. d2 = p1[2];
  240. d3 = p1[3];
  241. d4 = p1[4];
  242. d5 = p1[5];
  243. d6 = p1[6];
  244. d7 = p1[7];
  245. d0 ^= p2[0];
  246. d1 ^= p2[1];
  247. d2 ^= p2[2];
  248. d3 ^= p2[3];
  249. d4 ^= p2[4];
  250. d5 ^= p2[5];
  251. d6 ^= p2[6];
  252. d7 ^= p2[7];
  253. d0 ^= p3[0];
  254. d1 ^= p3[1];
  255. d2 ^= p3[2];
  256. d3 ^= p3[3];
  257. d4 ^= p3[4];
  258. d5 ^= p3[5];
  259. d6 ^= p3[6];
  260. d7 ^= p3[7];
  261. d0 ^= p4[0];
  262. d1 ^= p4[1];
  263. d2 ^= p4[2];
  264. d3 ^= p4[3];
  265. d4 ^= p4[4];
  266. d5 ^= p4[5];
  267. d6 ^= p4[6];
  268. d7 ^= p4[7];
  269. d0 ^= p5[0];
  270. d1 ^= p5[1];
  271. d2 ^= p5[2];
  272. d3 ^= p5[3];
  273. d4 ^= p5[4];
  274. d5 ^= p5[5];
  275. d6 ^= p5[6];
  276. d7 ^= p5[7];
  277. p1[0] = d0; /* Store the result (in bursts) */
  278. p1[1] = d1;
  279. p1[2] = d2;
  280. p1[3] = d3;
  281. p1[4] = d4;
  282. p1[5] = d5;
  283. p1[6] = d6;
  284. p1[7] = d7;
  285. p1 += 8;
  286. p2 += 8;
  287. p3 += 8;
  288. p4 += 8;
  289. p5 += 8;
  290. } while (--lines > 0);
  291. }
  292. static void
  293. xor_8regs_p_2(unsigned long bytes, unsigned long * __restrict p1,
  294. const unsigned long * __restrict p2)
  295. {
  296. long lines = bytes / (sizeof (long)) / 8 - 1;
  297. prefetchw(p1);
  298. prefetch(p2);
  299. do {
  300. prefetchw(p1+8);
  301. prefetch(p2+8);
  302. once_more:
  303. p1[0] ^= p2[0];
  304. p1[1] ^= p2[1];
  305. p1[2] ^= p2[2];
  306. p1[3] ^= p2[3];
  307. p1[4] ^= p2[4];
  308. p1[5] ^= p2[5];
  309. p1[6] ^= p2[6];
  310. p1[7] ^= p2[7];
  311. p1 += 8;
  312. p2 += 8;
  313. } while (--lines > 0);
  314. if (lines == 0)
  315. goto once_more;
  316. }
  317. static void
  318. xor_8regs_p_3(unsigned long bytes, unsigned long * __restrict p1,
  319. const unsigned long * __restrict p2,
  320. const unsigned long * __restrict p3)
  321. {
  322. long lines = bytes / (sizeof (long)) / 8 - 1;
  323. prefetchw(p1);
  324. prefetch(p2);
  325. prefetch(p3);
  326. do {
  327. prefetchw(p1+8);
  328. prefetch(p2+8);
  329. prefetch(p3+8);
  330. once_more:
  331. p1[0] ^= p2[0] ^ p3[0];
  332. p1[1] ^= p2[1] ^ p3[1];
  333. p1[2] ^= p2[2] ^ p3[2];
  334. p1[3] ^= p2[3] ^ p3[3];
  335. p1[4] ^= p2[4] ^ p3[4];
  336. p1[5] ^= p2[5] ^ p3[5];
  337. p1[6] ^= p2[6] ^ p3[6];
  338. p1[7] ^= p2[7] ^ p3[7];
  339. p1 += 8;
  340. p2 += 8;
  341. p3 += 8;
  342. } while (--lines > 0);
  343. if (lines == 0)
  344. goto once_more;
  345. }
  346. static void
  347. xor_8regs_p_4(unsigned long bytes, unsigned long * __restrict p1,
  348. const unsigned long * __restrict p2,
  349. const unsigned long * __restrict p3,
  350. const unsigned long * __restrict p4)
  351. {
  352. long lines = bytes / (sizeof (long)) / 8 - 1;
  353. prefetchw(p1);
  354. prefetch(p2);
  355. prefetch(p3);
  356. prefetch(p4);
  357. do {
  358. prefetchw(p1+8);
  359. prefetch(p2+8);
  360. prefetch(p3+8);
  361. prefetch(p4+8);
  362. once_more:
  363. p1[0] ^= p2[0] ^ p3[0] ^ p4[0];
  364. p1[1] ^= p2[1] ^ p3[1] ^ p4[1];
  365. p1[2] ^= p2[2] ^ p3[2] ^ p4[2];
  366. p1[3] ^= p2[3] ^ p3[3] ^ p4[3];
  367. p1[4] ^= p2[4] ^ p3[4] ^ p4[4];
  368. p1[5] ^= p2[5] ^ p3[5] ^ p4[5];
  369. p1[6] ^= p2[6] ^ p3[6] ^ p4[6];
  370. p1[7] ^= p2[7] ^ p3[7] ^ p4[7];
  371. p1 += 8;
  372. p2 += 8;
  373. p3 += 8;
  374. p4 += 8;
  375. } while (--lines > 0);
  376. if (lines == 0)
  377. goto once_more;
  378. }
  379. static void
  380. xor_8regs_p_5(unsigned long bytes, unsigned long * __restrict p1,
  381. const unsigned long * __restrict p2,
  382. const unsigned long * __restrict p3,
  383. const unsigned long * __restrict p4,
  384. const unsigned long * __restrict p5)
  385. {
  386. long lines = bytes / (sizeof (long)) / 8 - 1;
  387. prefetchw(p1);
  388. prefetch(p2);
  389. prefetch(p3);
  390. prefetch(p4);
  391. prefetch(p5);
  392. do {
  393. prefetchw(p1+8);
  394. prefetch(p2+8);
  395. prefetch(p3+8);
  396. prefetch(p4+8);
  397. prefetch(p5+8);
  398. once_more:
  399. p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0];
  400. p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1];
  401. p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2];
  402. p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3];
  403. p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4];
  404. p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5];
  405. p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6];
  406. p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7];
  407. p1 += 8;
  408. p2 += 8;
  409. p3 += 8;
  410. p4 += 8;
  411. p5 += 8;
  412. } while (--lines > 0);
  413. if (lines == 0)
  414. goto once_more;
  415. }
  416. static void
  417. xor_32regs_p_2(unsigned long bytes, unsigned long * __restrict p1,
  418. const unsigned long * __restrict p2)
  419. {
  420. long lines = bytes / (sizeof (long)) / 8 - 1;
  421. prefetchw(p1);
  422. prefetch(p2);
  423. do {
  424. register long d0, d1, d2, d3, d4, d5, d6, d7;
  425. prefetchw(p1+8);
  426. prefetch(p2+8);
  427. once_more:
  428. d0 = p1[0]; /* Pull the stuff into registers */
  429. d1 = p1[1]; /* ... in bursts, if possible. */
  430. d2 = p1[2];
  431. d3 = p1[3];
  432. d4 = p1[4];
  433. d5 = p1[5];
  434. d6 = p1[6];
  435. d7 = p1[7];
  436. d0 ^= p2[0];
  437. d1 ^= p2[1];
  438. d2 ^= p2[2];
  439. d3 ^= p2[3];
  440. d4 ^= p2[4];
  441. d5 ^= p2[5];
  442. d6 ^= p2[6];
  443. d7 ^= p2[7];
  444. p1[0] = d0; /* Store the result (in bursts) */
  445. p1[1] = d1;
  446. p1[2] = d2;
  447. p1[3] = d3;
  448. p1[4] = d4;
  449. p1[5] = d5;
  450. p1[6] = d6;
  451. p1[7] = d7;
  452. p1 += 8;
  453. p2 += 8;
  454. } while (--lines > 0);
  455. if (lines == 0)
  456. goto once_more;
  457. }
  458. static void
  459. xor_32regs_p_3(unsigned long bytes, unsigned long * __restrict p1,
  460. const unsigned long * __restrict p2,
  461. const unsigned long * __restrict p3)
  462. {
  463. long lines = bytes / (sizeof (long)) / 8 - 1;
  464. prefetchw(p1);
  465. prefetch(p2);
  466. prefetch(p3);
  467. do {
  468. register long d0, d1, d2, d3, d4, d5, d6, d7;
  469. prefetchw(p1+8);
  470. prefetch(p2+8);
  471. prefetch(p3+8);
  472. once_more:
  473. d0 = p1[0]; /* Pull the stuff into registers */
  474. d1 = p1[1]; /* ... in bursts, if possible. */
  475. d2 = p1[2];
  476. d3 = p1[3];
  477. d4 = p1[4];
  478. d5 = p1[5];
  479. d6 = p1[6];
  480. d7 = p1[7];
  481. d0 ^= p2[0];
  482. d1 ^= p2[1];
  483. d2 ^= p2[2];
  484. d3 ^= p2[3];
  485. d4 ^= p2[4];
  486. d5 ^= p2[5];
  487. d6 ^= p2[6];
  488. d7 ^= p2[7];
  489. d0 ^= p3[0];
  490. d1 ^= p3[1];
  491. d2 ^= p3[2];
  492. d3 ^= p3[3];
  493. d4 ^= p3[4];
  494. d5 ^= p3[5];
  495. d6 ^= p3[6];
  496. d7 ^= p3[7];
  497. p1[0] = d0; /* Store the result (in bursts) */
  498. p1[1] = d1;
  499. p1[2] = d2;
  500. p1[3] = d3;
  501. p1[4] = d4;
  502. p1[5] = d5;
  503. p1[6] = d6;
  504. p1[7] = d7;
  505. p1 += 8;
  506. p2 += 8;
  507. p3 += 8;
  508. } while (--lines > 0);
  509. if (lines == 0)
  510. goto once_more;
  511. }
  512. static void
  513. xor_32regs_p_4(unsigned long bytes, unsigned long * __restrict p1,
  514. const unsigned long * __restrict p2,
  515. const unsigned long * __restrict p3,
  516. const unsigned long * __restrict p4)
  517. {
  518. long lines = bytes / (sizeof (long)) / 8 - 1;
  519. prefetchw(p1);
  520. prefetch(p2);
  521. prefetch(p3);
  522. prefetch(p4);
  523. do {
  524. register long d0, d1, d2, d3, d4, d5, d6, d7;
  525. prefetchw(p1+8);
  526. prefetch(p2+8);
  527. prefetch(p3+8);
  528. prefetch(p4+8);
  529. once_more:
  530. d0 = p1[0]; /* Pull the stuff into registers */
  531. d1 = p1[1]; /* ... in bursts, if possible. */
  532. d2 = p1[2];
  533. d3 = p1[3];
  534. d4 = p1[4];
  535. d5 = p1[5];
  536. d6 = p1[6];
  537. d7 = p1[7];
  538. d0 ^= p2[0];
  539. d1 ^= p2[1];
  540. d2 ^= p2[2];
  541. d3 ^= p2[3];
  542. d4 ^= p2[4];
  543. d5 ^= p2[5];
  544. d6 ^= p2[6];
  545. d7 ^= p2[7];
  546. d0 ^= p3[0];
  547. d1 ^= p3[1];
  548. d2 ^= p3[2];
  549. d3 ^= p3[3];
  550. d4 ^= p3[4];
  551. d5 ^= p3[5];
  552. d6 ^= p3[6];
  553. d7 ^= p3[7];
  554. d0 ^= p4[0];
  555. d1 ^= p4[1];
  556. d2 ^= p4[2];
  557. d3 ^= p4[3];
  558. d4 ^= p4[4];
  559. d5 ^= p4[5];
  560. d6 ^= p4[6];
  561. d7 ^= p4[7];
  562. p1[0] = d0; /* Store the result (in bursts) */
  563. p1[1] = d1;
  564. p1[2] = d2;
  565. p1[3] = d3;
  566. p1[4] = d4;
  567. p1[5] = d5;
  568. p1[6] = d6;
  569. p1[7] = d7;
  570. p1 += 8;
  571. p2 += 8;
  572. p3 += 8;
  573. p4 += 8;
  574. } while (--lines > 0);
  575. if (lines == 0)
  576. goto once_more;
  577. }
  578. static void
  579. xor_32regs_p_5(unsigned long bytes, unsigned long * __restrict p1,
  580. const unsigned long * __restrict p2,
  581. const unsigned long * __restrict p3,
  582. const unsigned long * __restrict p4,
  583. const unsigned long * __restrict p5)
  584. {
  585. long lines = bytes / (sizeof (long)) / 8 - 1;
  586. prefetchw(p1);
  587. prefetch(p2);
  588. prefetch(p3);
  589. prefetch(p4);
  590. prefetch(p5);
  591. do {
  592. register long d0, d1, d2, d3, d4, d5, d6, d7;
  593. prefetchw(p1+8);
  594. prefetch(p2+8);
  595. prefetch(p3+8);
  596. prefetch(p4+8);
  597. prefetch(p5+8);
  598. once_more:
  599. d0 = p1[0]; /* Pull the stuff into registers */
  600. d1 = p1[1]; /* ... in bursts, if possible. */
  601. d2 = p1[2];
  602. d3 = p1[3];
  603. d4 = p1[4];
  604. d5 = p1[5];
  605. d6 = p1[6];
  606. d7 = p1[7];
  607. d0 ^= p2[0];
  608. d1 ^= p2[1];
  609. d2 ^= p2[2];
  610. d3 ^= p2[3];
  611. d4 ^= p2[4];
  612. d5 ^= p2[5];
  613. d6 ^= p2[6];
  614. d7 ^= p2[7];
  615. d0 ^= p3[0];
  616. d1 ^= p3[1];
  617. d2 ^= p3[2];
  618. d3 ^= p3[3];
  619. d4 ^= p3[4];
  620. d5 ^= p3[5];
  621. d6 ^= p3[6];
  622. d7 ^= p3[7];
  623. d0 ^= p4[0];
  624. d1 ^= p4[1];
  625. d2 ^= p4[2];
  626. d3 ^= p4[3];
  627. d4 ^= p4[4];
  628. d5 ^= p4[5];
  629. d6 ^= p4[6];
  630. d7 ^= p4[7];
  631. d0 ^= p5[0];
  632. d1 ^= p5[1];
  633. d2 ^= p5[2];
  634. d3 ^= p5[3];
  635. d4 ^= p5[4];
  636. d5 ^= p5[5];
  637. d6 ^= p5[6];
  638. d7 ^= p5[7];
  639. p1[0] = d0; /* Store the result (in bursts) */
  640. p1[1] = d1;
  641. p1[2] = d2;
  642. p1[3] = d3;
  643. p1[4] = d4;
  644. p1[5] = d5;
  645. p1[6] = d6;
  646. p1[7] = d7;
  647. p1 += 8;
  648. p2 += 8;
  649. p3 += 8;
  650. p4 += 8;
  651. p5 += 8;
  652. } while (--lines > 0);
  653. if (lines == 0)
  654. goto once_more;
  655. }
  656. static struct xor_block_template xor_block_8regs = {
  657. .name = "8regs",
  658. .do_2 = xor_8regs_2,
  659. .do_3 = xor_8regs_3,
  660. .do_4 = xor_8regs_4,
  661. .do_5 = xor_8regs_5,
  662. };
  663. static struct xor_block_template xor_block_32regs = {
  664. .name = "32regs",
  665. .do_2 = xor_32regs_2,
  666. .do_3 = xor_32regs_3,
  667. .do_4 = xor_32regs_4,
  668. .do_5 = xor_32regs_5,
  669. };
  670. static struct xor_block_template xor_block_8regs_p __maybe_unused = {
  671. .name = "8regs_prefetch",
  672. .do_2 = xor_8regs_p_2,
  673. .do_3 = xor_8regs_p_3,
  674. .do_4 = xor_8regs_p_4,
  675. .do_5 = xor_8regs_p_5,
  676. };
  677. static struct xor_block_template xor_block_32regs_p __maybe_unused = {
  678. .name = "32regs_prefetch",
  679. .do_2 = xor_32regs_p_2,
  680. .do_3 = xor_32regs_p_3,
  681. .do_4 = xor_32regs_p_4,
  682. .do_5 = xor_32regs_p_5,
  683. };
  684. #define XOR_TRY_TEMPLATES \
  685. do { \
  686. xor_speed(&xor_block_8regs); \
  687. xor_speed(&xor_block_8regs_p); \
  688. xor_speed(&xor_block_32regs); \
  689. xor_speed(&xor_block_32regs_p); \
  690. } while (0)