123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139 |
- /* SPDX-License-Identifier: GPL-2.0-only */
- /*
- * Copyright (c) 2013-2021, Arm Limited.
- *
- * Adapted from the original at:
- * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/memcmp.S
- */
- #include <linux/linkage.h>
- #include <asm/assembler.h>
- /* Assumptions:
- *
- * ARMv8-a, AArch64, unaligned accesses.
- */
- #define L(label) .L ## label
- /* Parameters and result. */
- #define src1 x0
- #define src2 x1
- #define limit x2
- #define result w0
- /* Internal variables. */
- #define data1 x3
- #define data1w w3
- #define data1h x4
- #define data2 x5
- #define data2w w5
- #define data2h x6
- #define tmp1 x7
- #define tmp2 x8
- SYM_FUNC_START(__pi_memcmp)
- subs limit, limit, 8
- b.lo L(less8)
- ldr data1, [src1], 8
- ldr data2, [src2], 8
- cmp data1, data2
- b.ne L(return)
- subs limit, limit, 8
- b.gt L(more16)
- ldr data1, [src1, limit]
- ldr data2, [src2, limit]
- b L(return)
- L(more16):
- ldr data1, [src1], 8
- ldr data2, [src2], 8
- cmp data1, data2
- bne L(return)
- /* Jump directly to comparing the last 16 bytes for 32 byte (or less)
- strings. */
- subs limit, limit, 16
- b.ls L(last_bytes)
- /* We overlap loads between 0-32 bytes at either side of SRC1 when we
- try to align, so limit it only to strings larger than 128 bytes. */
- cmp limit, 96
- b.ls L(loop16)
- /* Align src1 and adjust src2 with bytes not yet done. */
- and tmp1, src1, 15
- add limit, limit, tmp1
- sub src1, src1, tmp1
- sub src2, src2, tmp1
- /* Loop performing 16 bytes per iteration using aligned src1.
- Limit is pre-decremented by 16 and must be larger than zero.
- Exit if <= 16 bytes left to do or if the data is not equal. */
- .p2align 4
- L(loop16):
- ldp data1, data1h, [src1], 16
- ldp data2, data2h, [src2], 16
- subs limit, limit, 16
- ccmp data1, data2, 0, hi
- ccmp data1h, data2h, 0, eq
- b.eq L(loop16)
- cmp data1, data2
- bne L(return)
- mov data1, data1h
- mov data2, data2h
- cmp data1, data2
- bne L(return)
- /* Compare last 1-16 bytes using unaligned access. */
- L(last_bytes):
- add src1, src1, limit
- add src2, src2, limit
- ldp data1, data1h, [src1]
- ldp data2, data2h, [src2]
- cmp data1, data2
- bne L(return)
- mov data1, data1h
- mov data2, data2h
- cmp data1, data2
- /* Compare data bytes and set return value to 0, -1 or 1. */
- L(return):
- #ifndef __AARCH64EB__
- rev data1, data1
- rev data2, data2
- #endif
- cmp data1, data2
- L(ret_eq):
- cset result, ne
- cneg result, result, lo
- ret
- .p2align 4
- /* Compare up to 8 bytes. Limit is [-8..-1]. */
- L(less8):
- adds limit, limit, 4
- b.lo L(less4)
- ldr data1w, [src1], 4
- ldr data2w, [src2], 4
- cmp data1w, data2w
- b.ne L(return)
- sub limit, limit, 4
- L(less4):
- adds limit, limit, 4
- beq L(ret_eq)
- L(byte_loop):
- ldrb data1w, [src1], 1
- ldrb data2w, [src2], 1
- subs limit, limit, 1
- ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */
- b.eq L(byte_loop)
- sub result, data1w, data2w
- ret
- SYM_FUNC_END(__pi_memcmp)
- SYM_FUNC_ALIAS_WEAK(memcmp, __pi_memcmp)
- EXPORT_SYMBOL_NOKASAN(memcmp)
|