x86: atomic64 assembly improvements
In the "xchg" implementation, %ebx and %ecx don't need to be copied into %eax and %edx respectively (this is only necessary when desiring to only read the stored value). In the "add_unless" implementation, swapping the use of %ecx and %esi for passing arguments allows %esi to become an input only (i.e. permitting the register to be re-used to address the same object without reload). In "{add,sub}_return", doing the initial read64 through the passed in %ecx decreases a register dependency. In "inc_not_zero", a branch can be eliminated by or-ing together the two halves of the current (64-bit) value, and code size can be further reduced by adjusting the arithmetic slightly. v2: Undo the folding of "xchg" and "set". Signed-off-by: Jan Beulich <jbeulich@suse.com> Link: http://lkml.kernel.org/r/4F19A2BC020000780006E0DC@nat28.tlf.novell.com Cc: Luca Barbieri <luca@luca-barbieri.com> Cc: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
此提交包含在:
@@ -55,8 +55,6 @@ ENDPROC(atomic64_set_cx8)
|
||||
ENTRY(atomic64_xchg_cx8)
|
||||
CFI_STARTPROC
|
||||
|
||||
movl %ebx, %eax
|
||||
movl %ecx, %edx
|
||||
1:
|
||||
LOCK_PREFIX
|
||||
cmpxchg8b (%esi)
|
||||
@@ -78,7 +76,7 @@ ENTRY(atomic64_\func\()_return_cx8)
|
||||
movl %edx, %edi
|
||||
movl %ecx, %ebp
|
||||
|
||||
read64 %ebp
|
||||
read64 %ecx
|
||||
1:
|
||||
movl %eax, %ebx
|
||||
movl %edx, %ecx
|
||||
@@ -159,23 +157,22 @@ ENTRY(atomic64_add_unless_cx8)
|
||||
SAVE ebx
|
||||
/* these just push these two parameters on the stack */
|
||||
SAVE edi
|
||||
SAVE esi
|
||||
SAVE ecx
|
||||
|
||||
movl %ecx, %ebp
|
||||
movl %eax, %esi
|
||||
movl %eax, %ebp
|
||||
movl %edx, %edi
|
||||
|
||||
read64 %ebp
|
||||
read64 %esi
|
||||
1:
|
||||
cmpl %eax, 0(%esp)
|
||||
je 4f
|
||||
2:
|
||||
movl %eax, %ebx
|
||||
movl %edx, %ecx
|
||||
addl %esi, %ebx
|
||||
addl %ebp, %ebx
|
||||
adcl %edi, %ecx
|
||||
LOCK_PREFIX
|
||||
cmpxchg8b (%ebp)
|
||||
cmpxchg8b (%esi)
|
||||
jne 1b
|
||||
|
||||
movl $1, %eax
|
||||
@@ -199,13 +196,13 @@ ENTRY(atomic64_inc_not_zero_cx8)
|
||||
|
||||
read64 %esi
|
||||
1:
|
||||
testl %eax, %eax
|
||||
je 4f
|
||||
2:
|
||||
movl %eax, %ecx
|
||||
orl %edx, %ecx
|
||||
jz 3f
|
||||
movl %eax, %ebx
|
||||
movl %edx, %ecx
|
||||
xorl %ecx, %ecx
|
||||
addl $1, %ebx
|
||||
adcl $0, %ecx
|
||||
adcl %edx, %ecx
|
||||
LOCK_PREFIX
|
||||
cmpxchg8b (%esi)
|
||||
jne 1b
|
||||
@@ -214,9 +211,5 @@ ENTRY(atomic64_inc_not_zero_cx8)
|
||||
3:
|
||||
RESTORE ebx
|
||||
ret
|
||||
4:
|
||||
testl %edx, %edx
|
||||
jne 2b
|
||||
jmp 3b
|
||||
CFI_ENDPROC
|
||||
ENDPROC(atomic64_inc_not_zero_cx8)
|
||||
|
新增問題並參考
封鎖使用者