memset.S 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. /* SPDX-License-Identifier: GPL-2.0-or-later */
  2. /*
  3. * OpenRISC memset.S
  4. *
  5. * Hand-optimized assembler version of memset for OpenRISC.
  6. * Algorithm inspired by several other arch-specific memset routines
  7. * in the kernel tree
  8. *
  9. * Copyright (C) 2015 Olof Kindgren <[email protected]>
  10. */
  11. .global memset
  12. .type memset, @function
  13. memset:
  14. /* arguments:
  15. * r3 = *s
  16. * r4 = c
  17. * r5 = n
  18. * r13, r15, r17, r19 used as temp regs
  19. */
  20. /* Exit if n == 0 */
  21. l.sfeqi r5, 0
  22. l.bf 4f
  23. /* Truncate c to char */
  24. l.andi r13, r4, 0xff
  25. /* Skip word extension if c is 0 */
  26. l.sfeqi r13, 0
  27. l.bf 1f
  28. /* Check for at least two whole words (8 bytes) */
  29. l.sfleui r5, 7
  30. /* Extend char c to 32-bit word cccc in r13 */
  31. l.slli r15, r13, 16 // r13 = 000c, r15 = 0c00
  32. l.or r13, r13, r15 // r13 = 0c0c, r15 = 0c00
  33. l.slli r15, r13, 8 // r13 = 0c0c, r15 = c0c0
  34. l.or r13, r13, r15 // r13 = cccc, r15 = c0c0
  35. 1: l.addi r19, r3, 0 // Set r19 = src
  36. /* Jump to byte copy loop if less than two words */
  37. l.bf 3f
  38. l.or r17, r5, r0 // Set r17 = n
  39. /* Mask out two LSBs to check alignment */
  40. l.andi r15, r3, 0x3
  41. /* lsb == 00, jump to word copy loop */
  42. l.sfeqi r15, 0
  43. l.bf 2f
  44. l.addi r19, r3, 0 // Set r19 = src
  45. /* lsb == 01,10 or 11 */
  46. l.sb 0(r3), r13 // *src = c
  47. l.addi r17, r17, -1 // Decrease n
  48. l.sfeqi r15, 3
  49. l.bf 2f
  50. l.addi r19, r3, 1 // src += 1
  51. /* lsb == 01 or 10 */
  52. l.sb 1(r3), r13 // *(src+1) = c
  53. l.addi r17, r17, -1 // Decrease n
  54. l.sfeqi r15, 2
  55. l.bf 2f
  56. l.addi r19, r3, 2 // src += 2
  57. /* lsb == 01 */
  58. l.sb 2(r3), r13 // *(src+2) = c
  59. l.addi r17, r17, -1 // Decrease n
  60. l.addi r19, r3, 3 // src += 3
  61. /* Word copy loop */
  62. 2: l.sw 0(r19), r13 // *src = cccc
  63. l.addi r17, r17, -4 // Decrease n
  64. l.sfgeui r17, 4
  65. l.bf 2b
  66. l.addi r19, r19, 4 // Increase src
  67. /* When n > 0, copy the remaining bytes, otherwise jump to exit */
  68. l.sfeqi r17, 0
  69. l.bf 4f
  70. /* Byte copy loop */
  71. 3: l.addi r17, r17, -1 // Decrease n
  72. l.sb 0(r19), r13 // *src = cccc
  73. l.sfnei r17, 0
  74. l.bf 3b
  75. l.addi r19, r19, 1 // Increase src
  76. 4: l.jr r9
  77. l.ori r11, r3, 0