Changeset View
Changeset View
Standalone View
Standalone View
sys/amd64/amd64/support.S
Show First 20 Lines • Show All 691 Lines • ▼ Show 20 Lines | ENTRY(fillw) | ||||
movq %rsi,%rdi | movq %rsi,%rdi | ||||
movq %rdx,%rcx | movq %rdx,%rcx | ||||
rep | rep | ||||
stosw | stosw | ||||
POP_FRAME_POINTER | POP_FRAME_POINTER | ||||
ret | ret | ||||
END(fillw) | END(fillw) | ||||
/* | |||||
* strlen(string) | |||||
* %rdi | |||||
* | |||||
* Uses the ((x - 0x01....01) & ~x & 0x80....80) trick. | |||||
* | |||||
* 0x01....01 is replaced with 0x0 - 0x01....01 so that it can be added | |||||
* with leaq. | |||||
* | |||||
* For a description see either: | |||||
* - "Hacker's Delight" by Henry S. Warren, Jr. | |||||
* - "Optimizing subroutines in assembly language: An optimization guide for x86 platforms" | |||||
* by Agner Fog | |||||
* | |||||
* The latter contains a 32-bit variant of the same algorithm coded in assembly for i386. | |||||
*/ | |||||
ENTRY(strlen) | |||||
PUSH_FRAME_POINTER | |||||
movabsq $0xfefefefefefefeff,%r8 | |||||
movabsq $0x8080808080808080,%r9 | |||||
movq %rdi,%r10 | |||||
movq %rdi,%rcx | |||||
testb $7,%dil | |||||
jz 2f | |||||
/* | |||||
* Handle misaligned reads: align to 8 and fill | |||||
* the spurious bytes. | |||||
*/ | |||||
andq $~7,%rdi | |||||
movq (%rdi),%r11 | |||||
shlq $3,%rcx | |||||
movq $-1,%rdx | |||||
shlq %cl,%rdx | |||||
notq %rdx | |||||
orq %rdx,%r11 | |||||
leaq (%r11,%r8),%rcx | |||||
notq %r11 | |||||
andq %r11,%rcx | |||||
andq %r9,%rcx | |||||
jnz 3f | |||||
/* | |||||
* Main loop. | |||||
*/ | |||||
ALIGN_TEXT | |||||
1: | |||||
leaq 8(%rdi),%rdi | |||||
2: | |||||
movq (%rdi),%r11 | |||||
leaq (%r11,%r8),%rcx | |||||
notq %r11 | |||||
andq %r11,%rcx | |||||
andq %r9,%rcx | |||||
jz 1b | |||||
3: | |||||
bsfq %rcx,%rcx | |||||
shrq $3,%rcx | |||||
leaq (%rcx,%rdi),%rax | |||||
subq %r10,%rax | |||||
POP_FRAME_POINTER | |||||
ret | |||||
END(strlen) | |||||
/*****************************************************************************/ | /*****************************************************************************/ | ||||
/* copyout and fubyte family */ | /* copyout and fubyte family */ | ||||
/*****************************************************************************/ | /*****************************************************************************/ | ||||
/* | /* | ||||
* Access user memory from inside the kernel. These routines should be | * Access user memory from inside the kernel. These routines should be | ||||
* the only places that do this. | * the only places that do this. | ||||
* | * | ||||
* These routines set curpcb->pcb_onfault for the time they execute. When a | * These routines set curpcb->pcb_onfault for the time they execute. When a | ||||
▲ Show 20 Lines • Show All 1,201 Lines • Show Last 20 Lines |