Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F144373650
D17441.id49038.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
5 KB
Referenced Files
None
Subscribers
None
D17441.id49038.diff
View Options
Index: head/sys/amd64/amd64/support.S
===================================================================
--- head/sys/amd64/amd64/support.S
+++ head/sys/amd64/amd64/support.S
@@ -200,82 +200,236 @@
* Adapted from bcopy written by:
* ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
*/
-ENTRY(memmove_std)
- PUSH_FRAME_POINTER
- movq %rdi,%rax
- movq %rdx,%rcx
+/*
+ * Register state at entry is supposed to be as follows:
+ * rdi - destination
+ * rsi - source
+ * rdx - count
+ *
+ * The macro possibly clobbers the above and: rcx, r8.
+ * It does not clobber rax, r10 nor r11.
+ */
+.macro MEMMOVE erms overlap begin end
+ \begin
+.if \overlap == 1
movq %rdi,%r8
subq %rsi,%r8
- cmpq %rcx,%r8 /* overlapping && src < dst? */
+ cmpq %rcx,%r8 /* overlapping && src < dst? */
jb 2f
+.endif
- cmpq $15,%rcx
- jbe 1f
- shrq $3,%rcx /* copy by 64-bit words */
- rep
- movsq
- movq %rdx,%rcx
- andq $7,%rcx /* any bytes left? */
- jne 1f
- POP_FRAME_POINTER
+ cmpq $32,%rcx
+ jb 1016f
+
+ cmpq $256,%rcx
+ ja 1256f
+
+1032:
+ movq (%rsi),%rdx
+ movq %rdx,(%rdi)
+ movq 8(%rsi),%rdx
+ movq %rdx,8(%rdi)
+ movq 16(%rsi),%rdx
+ movq %rdx,16(%rdi)
+ movq 24(%rsi),%rdx
+ movq %rdx,24(%rdi)
+ leaq 32(%rsi),%rsi
+ leaq 32(%rdi),%rdi
+ subq $32,%rcx
+ cmpq $32,%rcx
+ jae 1032b
+ cmpb $0,%cl
+ jne 1016f
+ \end
ret
ALIGN_TEXT
-1:
+1016:
+ cmpb $16,%cl
+ jl 1008f
+ movq (%rsi),%rdx
+ movq %rdx,(%rdi)
+ movq 8(%rsi),%rdx
+ movq %rdx,8(%rdi)
+ subb $16,%cl
+ jz 1000f
+ leaq 16(%rsi),%rsi
+ leaq 16(%rdi),%rdi
+1008:
+ cmpb $8,%cl
+ jl 1004f
+ movq (%rsi),%rdx
+ movq %rdx,(%rdi)
+ subb $8,%cl
+ jz 1000f
+ leaq 8(%rsi),%rsi
+ leaq 8(%rdi),%rdi
+1004:
+ cmpb $4,%cl
+ jl 1002f
+ movl (%rsi),%edx
+ movl %edx,(%rdi)
+ subb $4,%cl
+ jz 1000f
+ leaq 4(%rsi),%rsi
+ leaq 4(%rdi),%rdi
+1002:
+ cmpb $2,%cl
+ jl 1001f
+ movw (%rsi),%dx
+ movw %dx,(%rdi)
+ subb $2,%cl
+ jz 1000f
+ leaq 2(%rsi),%rsi
+ leaq 2(%rdi),%rdi
+1001:
+ cmpb $1,%cl
+ jl 1000f
+ movb (%rsi),%dl
+ movb %dl,(%rdi)
+1000:
+ \end
+ ret
+
+ ALIGN_TEXT
+1256:
+.if \erms == 1
rep
movsb
- POP_FRAME_POINTER
+.else
+ shrq $3,%rcx /* copy by 64-bit words */
+ rep
+ movsq
+ movq %rdx,%rcx
+ andb $7,%cl /* any bytes left? */
+ jne 1004b
+.endif
+ \end
ret
- /* ALIGN_TEXT */
+.if \overlap == 1
+ /*
+ * Copy backwards.
+ */
+ ALIGN_TEXT
2:
- addq %rcx,%rdi /* copy backwards */
+ addq %rcx,%rdi
addq %rcx,%rsi
+
+ cmpq $32,%rcx
+ jb 2016f
+
+ cmpq $256,%rcx
+ ja 2256f
+
+2032:
+ movq -8(%rsi),%rdx
+ movq %rdx,-8(%rdi)
+ movq -16(%rsi),%rdx
+ movq %rdx,-16(%rdi)
+ movq -24(%rsi),%rdx
+ movq %rdx,-24(%rdi)
+ movq -32(%rsi),%rdx
+ movq %rdx,-32(%rdi)
+ leaq -32(%rsi),%rsi
+ leaq -32(%rdi),%rdi
+ subq $32,%rcx
+ cmpq $32,%rcx
+ jae 2032b
+ cmpb $0,%cl
+ jne 2016f
+ \end
+ ret
+ ALIGN_TEXT
+2016:
+ cmpb $16,%cl
+ jl 2008f
+ movq -8(%rsi),%rdx
+ movq %rdx,-8(%rdi)
+ movq -16(%rsi),%rdx
+ movq %rdx,-16(%rdi)
+ subb $16,%cl
+ jz 2000f
+ leaq -16(%rsi),%rsi
+ leaq -16(%rdi),%rdi
+2008:
+ cmpb $8,%cl
+ jl 2004f
+ movq -8(%rsi),%rdx
+ movq %rdx,-8(%rdi)
+ subb $8,%cl
+ jz 2000f
+ leaq -8(%rsi),%rsi
+ leaq -8(%rdi),%rdi
+2004:
+ cmpb $4,%cl
+ jl 2002f
+ movl -4(%rsi),%edx
+ movl %edx,-4(%rdi)
+ subb $4,%cl
+ jz 2000f
+ leaq -4(%rsi),%rsi
+ leaq -4(%rdi),%rdi
+2002:
+ cmpb $2,%cl
+ jl 2001f
+ movw -2(%rsi),%dx
+ movw %dx,-2(%rdi)
+ subb $2,%cl
+ jz 2000f
+ leaq -2(%rsi),%rsi
+ leaq -2(%rdi),%rdi
+2001:
+ cmpb $1,%cl
+ jl 2000f
+ movb -1(%rsi),%dl
+ movb %dl,-1(%rdi)
+2000:
+ \end
+ ret
+ ALIGN_TEXT
+2256:
decq %rdi
decq %rsi
std
- andq $7,%rcx /* any fractional bytes? */
+.if \erms == 1
+ rep
+ movsb
+.else
+ andq $7,%rcx /* any fractional bytes? */
je 3f
rep
movsb
3:
- movq %rdx,%rcx /* copy remainder by 32-bit words */
+ movq %rdx,%rcx /* copy remainder by 32-bit words */
shrq $3,%rcx
subq $7,%rsi
subq $7,%rdi
rep
movsq
+.endif
cld
- POP_FRAME_POINTER
+ \end
ret
-END(memmove_std)
+.endif
+.endm
-ENTRY(memmove_erms)
+.macro MEMMOVE_BEGIN
PUSH_FRAME_POINTER
movq %rdi,%rax
movq %rdx,%rcx
+.endm
- movq %rdi,%r8
- subq %rsi,%r8
- cmpq %rcx,%r8 /* overlapping && src < dst? */
- jb 1f
-
- rep
- movsb
+.macro MEMMOVE_END
POP_FRAME_POINTER
- ret
+.endm
-1:
- addq %rcx,%rdi /* copy backwards */
- addq %rcx,%rsi
- decq %rdi
- decq %rsi
- std
- rep
- movsb
- cld
- POP_FRAME_POINTER
- ret
+ENTRY(memmove_std)
+ MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
+END(memmove_std)
+
+ENTRY(memmove_erms)
+ MEMMOVE erms=1 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
END(memmove_erms)
/*
@@ -285,35 +439,11 @@
* Note: memcpy does not support overlapping copies
*/
ENTRY(memcpy_std)
- PUSH_FRAME_POINTER
- movq %rdi,%rax
- movq %rdx,%rcx
- cmpq $15,%rcx
- jbe 1f
- shrq $3,%rcx /* copy by 64-bit words */
- rep
- movsq
- movq %rdx,%rcx
- andq $7,%rcx /* any bytes left? */
- jne 1f
- POP_FRAME_POINTER
- ret
- ALIGN_TEXT
-1:
- rep
- movsb
- POP_FRAME_POINTER
- ret
+ MEMMOVE erms=0 overlap=0 begin=MEMMOVE_BEGIN end=MEMMOVE_END
END(memcpy_std)
ENTRY(memcpy_erms)
- PUSH_FRAME_POINTER
- movq %rdi,%rax
- movq %rdx,%rcx
- rep
- movsb
- POP_FRAME_POINTER
- ret
+ MEMMOVE erms=1 overlap=0 begin=MEMMOVE_BEGIN end=MEMMOVE_END
END(memcpy_erms)
/*
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Feb 9, 3:00 AM (9 h, 47 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28528578
Default Alt Text
D17441.id49038.diff (5 KB)
Attached To
Mode
D17441: amd64: make memmove and memcpy less slow with mov
Attached
Detach File
Event Timeline
Log In to Comment