strlen.S 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*
  3. * strlen.S (c) 1995 David Mosberger ([email protected])
  4. *
  5. * Finds length of a 0-terminated string. Optimized for the
  6. * Alpha architecture:
  7. *
  8. * - memory accessed as aligned quadwords only
  9. * - uses bcmpge to compare 8 bytes in parallel
  10. * - does binary search to find 0 byte in last
  11. * quadword (HAKMEM needed 12 instructions to
  12. * do this instead of the 9 instructions that
  13. * binary search needs).
  14. */
  15. #include <asm/export.h>
  16. .set noreorder
  17. .set noat
  18. .align 3
  19. .globl strlen
  20. .ent strlen
  21. strlen:
  22. ldq_u $1, 0($16) # load first quadword ($16 may be misaligned)
  23. lda $2, -1($31)
  24. insqh $2, $16, $2
  25. andnot $16, 7, $0
  26. or $2, $1, $1
  27. cmpbge $31, $1, $2 # $2 <- bitmask: bit i == 1 <==> i-th byte == 0
  28. bne $2, found
  29. loop: ldq $1, 8($0)
  30. addq $0, 8, $0 # addr += 8
  31. nop # helps dual issue last two insns
  32. cmpbge $31, $1, $2
  33. beq $2, loop
  34. found: blbs $2, done # make aligned case fast
  35. negq $2, $3
  36. and $2, $3, $2
  37. and $2, 0x0f, $1
  38. addq $0, 4, $3
  39. cmoveq $1, $3, $0
  40. and $2, 0x33, $1
  41. addq $0, 2, $3
  42. cmoveq $1, $3, $0
  43. and $2, 0x55, $1
  44. addq $0, 1, $3
  45. cmoveq $1, $3, $0
  46. done: subq $0, $16, $0
  47. ret $31, ($26)
  48. .end strlen
  49. EXPORT_SYMBOL(strlen)