diff --git a/lib/libc/amd64/amd64_archlevel.h b/lib/libc/amd64/amd64_archlevel.h deleted file mode 100644 index 047beb9855d1..000000000000 --- a/lib/libc/amd64/amd64_archlevel.h +++ /dev/null @@ -1,90 +0,0 @@ -/*- - * Copyright (c) 2023 The FreeBSD Foundation - * - * This software was developed by Robert Clausecker - * under sponsorship from the FreeBSD Foundation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE - */ - -/* must be macros so they can be accessed from assembly */ -#define X86_64_SCALAR 0 /* disable SIMD optimisations */ -#define X86_64_BASELINE 1 /* CMOV, CX8, FPU, FXSR, MMX, OSFXSR, SSE, SSE2 */ -#define X86_64_V2 2 /* CMPXCHG16B, LAHF-SAHF, POPCNT, SSE3, SSSE3, SSE4_1, SSE4_2 */ -#define X86_64_V3 3 /* AVX, AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE, OSXSAVE */ -#define X86_64_V4 4 /* AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL */ - -#define X86_64_MAX X86_64_V4 /* highest supported architecture level */ -#define X86_64_UNDEFINED -1 /* architecture level not set yet */ - -#ifndef __ASSEMBLER__ -#include - -dlfunc_t __archlevel_resolve(u_int, u_int, u_int, u_int, - int32_t[X86_64_MAX + 1]) __hidden; -#else -#include - -#define ARCHRESOLVE(func) \ - .globl CNAME(func); \ - .type CNAME(func), @gnu_indirect_function; \ - .set CNAME(func), __CONCAT(func,_resolver); \ - ARCHENTRY(func, resolver); \ - lea __CONCAT(func,_funcs)(%rip), %r8; \ - jmp CNAME(__archlevel_resolve); \ - ARCHEND(func, resolver) - -/* - * The func_funcs array stores the location of the implementations - * as the distance from the func_funcs array to the function. Due - * to compiling for the medium code model, a 32 bit integer suffices - * to hold the distance. - * - * Doing it this way both saves storage and avoids giving rtld - * relocations to process at load time. - */ -#define ARCHFUNCS(func) \ - ARCHRESOLVE(func); \ - .section .rodata; \ - .align 4; \ - __CONCAT(func,_funcs): - -#define NOARCHFUNC \ - .4byte 0 - -#define ARCHFUNC(func, level) \ - .4byte __CONCAT(__CONCAT(func,_),level) - __CONCAT(func,_funcs) - -#define ENDARCHFUNCS(func) \ - .zero 4*(X86_64_MAX+1)-(.-__CONCAT(func,_funcs)); \ - .size __CONCAT(func,_funcs), .-__CONCAT(func,_funcs) - -#define ARCHENTRY(func, level) \ - _START_ENTRY; \ - .type __CONCAT(__CONCAT(func,_),level), @function; \ - __CONCAT(__CONCAT(func,_),level):; \ - .cfi_startproc - -#define ARCHEND(func, level) \ - END(__CONCAT(__CONCAT(func,_),level)) - -#endif /* __ASSEMBLER__ */ diff --git a/lib/libc/amd64/string/Makefile.inc b/lib/libc/amd64/string/Makefile.inc index 4df4ff8f1417..faa7c251ae3b 100644 --- a/lib/libc/amd64/string/Makefile.inc +++ b/lib/libc/amd64/string/Makefile.inc @@ -1,14 +1,11 @@ MDSRCS+= \ - amd64_archlevel.c \ bcmp.S \ memcmp.S \ memcpy.S \ memmove.S \ memset.S \ - stpcpy.S \ strcat.S \ - strchrnul.S \ strcmp.S \ strlen.S \ - strcpy.c + stpcpy.S diff --git a/lib/libc/amd64/string/amd64_archlevel.c b/lib/libc/amd64/string/amd64_archlevel.c deleted file mode 100644 index c06566658c59..000000000000 --- a/lib/libc/amd64/string/amd64_archlevel.c +++ /dev/null @@ -1,241 +0,0 @@ -/*- - * Copyright (c) 2023 The FreeBSD Foundation - * - * This software was developed by Robert Clausecker - * under sponsorship from the FreeBSD Foundation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE - */ - -#include - -#include -#include -#include - -#include -#include - -#include "amd64_archlevel.h" -#include "libc_private.h" - -#define ARCHLEVEL_ENV "ARCHLEVEL" - -static volatile int amd64_archlevel = X86_64_UNDEFINED; - -static const struct archlevel { - char name[10]; - /* CPUID feature bits that need to be present */ - u_int feat_edx, feat_ecx, amd_ecx, ext_ebx; -} levels[] = { - { - .name = "scalar", - .feat_edx = 0, - .feat_ecx = 0, - .amd_ecx = 0, - .ext_ebx = 0, - }, { -#define FEAT_EDX_BASELINE (CPUID_FPU | CPUID_CX8 | CPUID_CMOV | CPUID_MMX | \ - CPUID_FXSR | CPUID_SSE | CPUID_SSE2) - .name = "baseline", - .feat_edx = FEAT_EDX_BASELINE, - .feat_ecx = 0, - .amd_ecx = 0, - .ext_ebx = 0, - }, { -#define FEAT_ECX_V2 (CPUID2_SSE3 | CPUID2_SSSE3 | CPUID2_CX16 | CPUID2_SSE41 | \ - CPUID2_SSE42 | CPUID2_POPCNT) -#define AMD_ECX_V2 AMDID2_LAHF - .name = "x86-64-v2", - .feat_edx = FEAT_EDX_BASELINE, - .feat_ecx = FEAT_ECX_V2, - .amd_ecx = AMD_ECX_V2, - .ext_ebx = 0, - }, { -#define FEAT_ECX_V3 (FEAT_ECX_V2 | CPUID2_FMA | CPUID2_MOVBE | \ - CPUID2_OSXSAVE | CPUID2_AVX | CPUID2_F16C) -#define AMD_ECX_V3 (AMD_ECX_V2 | AMDID2_ABM) -#define EXT_EBX_V3 (CPUID_STDEXT_BMI1 | CPUID_STDEXT_AVX2 | CPUID_STDEXT_BMI2) - .name = "x86-64-v3", - .feat_edx = FEAT_EDX_BASELINE, - .feat_ecx = FEAT_ECX_V3, - .amd_ecx = AMD_ECX_V3, - .ext_ebx = EXT_EBX_V3, - }, { -#define EXT_EBX_V4 (EXT_EBX_V3 | CPUID_STDEXT_AVX512F | \ - CPUID_STDEXT_AVX512DQ | CPUID_STDEXT_AVX512CD | \ - CPUID_STDEXT_AVX512BW | CPUID_STDEXT_AVX512VL) - .name = "x86-64-v4", - .feat_edx = FEAT_EDX_BASELINE, - .feat_ecx = FEAT_ECX_V3, - .amd_ecx = AMD_ECX_V3, - .ext_ebx = EXT_EBX_V4, - } -}; - -static int -supported_archlevel(u_int feat_edx, u_int feat_ecx, u_int ext_ebx, u_int ext_ecx) -{ - int level; - u_int p[4], max_leaf; - u_int amd_ecx = 0; - - (void)ext_ecx; - - do_cpuid(0x80000000, p); - max_leaf = p[0]; - - if (max_leaf >= 0x80000001) { - do_cpuid(0x80000001, p); - amd_ecx = p[2]; - } - - for (level = X86_64_BASELINE; level <= X86_64_MAX; level++) { - const struct archlevel *lvl = &levels[level]; - - if ((lvl->feat_edx & feat_edx) != lvl->feat_edx || - (lvl->feat_ecx & feat_ecx) != lvl->feat_ecx || - (lvl->amd_ecx & amd_ecx) != lvl->amd_ecx || - (lvl->ext_ebx & ext_ebx) != lvl->ext_ebx) - return (level - 1); - } - - return (X86_64_MAX); -} - -static int -match_archlevel(const char *str, int *force) -{ - int level, want_force = 0; - - *force = 0; - - if (str[0] == '!') { - str++; - want_force = 1; - } - - for (level = 0; level <= X86_64_MAX; level++) { - size_t i; - const char *candidate = levels[level].name; - - /* can't use strcmp here: would recurse during ifunc resolution */ - for (i = 0; str[i] == candidate[i]; i++) - /* suffixes starting with : or + are ignored for future extensions */ - if (str[i] == '\0' || str[i] == ':' || str[i] == '+') { - if (want_force) - *force = 1; - - return (level); - } - } - - return (X86_64_UNDEFINED); -} - -/* - * We can't use getenv(), strcmp(), and a bunch of other functions here as - * they may in turn call SIMD-optimised string functions. - * - * *force is set to 1 if the architecture level is valid and begins with a ! - * and to 0 otherwise. - */ -static int -env_archlevel(int *force) -{ - size_t i; - - if (environ == NULL) - return (X86_64_UNDEFINED); - - for (i = 0; environ[i] != NULL; i++) { - size_t j; - - for (j = 0; environ[i][j] == ARCHLEVEL_ENV "="[j]; j++) - if (environ[i][j] == '=') - return (match_archlevel(&environ[i][j + 1], force)); - } - - *force = 0; - - return (X86_64_UNDEFINED); - -} - -/* - * Determine the architecture level by checking the CPU capabilities - * and the environment: - * - * 1. If environment variable ARCHLEVEL starts with a ! and is followed - * by a valid architecture level, that level is returned. - * 2. Else if ARCHLEVEL is set to a valid architecture level that is - * supported by the CPU, that level is returned. - * 3. Else the highest architecture level supported by the CPU is - * returned. - * - * Valid architecture levels are those defined in the levels array. - * The architecture level "scalar" indicates that SIMD enhancements - * shall not be used. - */ -static int -archlevel(u_int feat_edx, u_int feat_ecx, u_int ext_ebx, u_int ext_ecx) -{ - int islevel, wantlevel, hwlevel, force; - - islevel = atomic_load_int(&amd64_archlevel); - if (islevel != X86_64_UNDEFINED) - return (islevel); - - wantlevel = env_archlevel(&force); - if (!force) { - hwlevel = supported_archlevel(feat_edx, feat_ecx, ext_ebx, ext_ecx); - if (wantlevel == X86_64_UNDEFINED || wantlevel > hwlevel) - wantlevel = hwlevel; - } - - /* - * Ensure amd64_archlevel is set only once and - * all calls agree on what it was set to. - */ - if (atomic_cmpset_int(&amd64_archlevel, islevel, wantlevel)) - return (wantlevel); - else - return (atomic_load_int(&amd64_archlevel)); -} - -/* - * Helper function for SIMD ifunc dispatch: select the highest level - * implementation up to the current architecture level. - */ -dlfunc_t -__archlevel_resolve(u_int feat_edx, u_int feat_ecx, u_int ext_ebx, - u_int ext_ecx, int32_t funcs[static X86_64_MAX + 1]) -{ - int level; - - for (level = archlevel(feat_edx, feat_ecx, ext_ebx, ext_ecx); level >= 0; level--) - if (funcs[level] != 0) - return (dlfunc_t)((uintptr_t)funcs + (ptrdiff_t)funcs[level]); - - /* no function is present -- what now? */ - __builtin_trap(); -} diff --git a/lib/libc/amd64/string/memcmp.S b/lib/libc/amd64/string/memcmp.S index d192229677b3..fea5cebc65f2 100644 --- a/lib/libc/amd64/string/memcmp.S +++ b/lib/libc/amd64/string/memcmp.S @@ -1,405 +1,236 @@ /*- - * Copyright (c) 2018, 2023 The FreeBSD Foundation + * Copyright (c) 2018 The FreeBSD Foundation * * This software was developed by Mateusz Guzik * under sponsorship from the FreeBSD Foundation. * - * Portions of this software were developed by Robert Clausecker - * under sponsorship from the FreeBSD Foundation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include -#include - -#include "amd64_archlevel.h" - /* * Note: this routine was written with kernel use in mind (read: no simd), * it is only present in userspace as a temporary measure until something * better gets imported. */ #define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ #ifdef BCMP -#define memcmp bcmp +ENTRY(bcmp) +#else +ENTRY(memcmp) #endif - -ARCHFUNCS(memcmp) - ARCHFUNC(memcmp, scalar) - ARCHFUNC(memcmp, baseline) -ENDARCHFUNCS(memcmp) - -ARCHENTRY(memcmp, scalar) xorl %eax,%eax 10: cmpq $16,%rdx ja 101632f cmpb $8,%dl jg 100816f cmpb $4,%dl jg 100408f cmpb $2,%dl jge 100204f cmpb $1,%dl jl 100000f movzbl (%rdi),%eax movzbl (%rsi),%r8d subl %r8d,%eax 100000: ret ALIGN_TEXT 100816: movq (%rdi),%r8 movq (%rsi),%r9 cmpq %r8,%r9 jne 80f movq -8(%rdi,%rdx),%r8 movq -8(%rsi,%rdx),%r9 cmpq %r8,%r9 jne 10081608f ret ALIGN_TEXT 100408: movl (%rdi),%r8d movl (%rsi),%r9d cmpl %r8d,%r9d jne 80f movl -4(%rdi,%rdx),%r8d movl -4(%rsi,%rdx),%r9d cmpl %r8d,%r9d jne 10040804f ret ALIGN_TEXT 100204: movzwl (%rdi),%r8d movzwl (%rsi),%r9d cmpl %r8d,%r9d jne 1f movzwl -2(%rdi,%rdx),%r8d movzwl -2(%rsi,%rdx),%r9d cmpl %r8d,%r9d jne 1f ret ALIGN_TEXT 101632: cmpq $32,%rdx ja 103200f movq (%rdi),%r8 movq (%rsi),%r9 cmpq %r8,%r9 jne 80f movq 8(%rdi),%r8 movq 8(%rsi),%r9 cmpq %r8,%r9 jne 10163208f movq -16(%rdi,%rdx),%r8 movq -16(%rsi,%rdx),%r9 cmpq %r8,%r9 jne 10163216f movq -8(%rdi,%rdx),%r8 movq -8(%rsi,%rdx),%r9 cmpq %r8,%r9 jne 10163224f ret ALIGN_TEXT 103200: movq (%rdi),%r8 movq 8(%rdi),%r9 subq (%rsi),%r8 subq 8(%rsi),%r9 orq %r8,%r9 jnz 10320000f movq 16(%rdi),%r8 movq 24(%rdi),%r9 subq 16(%rsi),%r8 subq 24(%rsi),%r9 orq %r8,%r9 jnz 10320016f leaq 32(%rdi),%rdi leaq 32(%rsi),%rsi subq $32,%rdx cmpq $32,%rdx jae 103200b cmpb $0,%dl jne 10b ret /* * Mismatch was found. */ #ifdef BCMP ALIGN_TEXT 10320016: 10320000: 10081608: 10163224: 10163216: 10163208: 10040804: 80: 1: leal 1(%eax),%eax ret +END(bcmp) #else /* * We need to compute the difference between strings. * Start with narrowing the range down (16 -> 8 -> 4 bytes). */ ALIGN_TEXT 10320016: leaq 16(%rdi),%rdi leaq 16(%rsi),%rsi 10320000: movq (%rdi),%r8 movq (%rsi),%r9 cmpq %r8,%r9 jne 80f leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi jmp 80f ALIGN_TEXT 10081608: 10163224: leaq -8(%rdi,%rdx),%rdi leaq -8(%rsi,%rdx),%rsi jmp 80f ALIGN_TEXT 10163216: leaq -16(%rdi,%rdx),%rdi leaq -16(%rsi,%rdx),%rsi jmp 80f ALIGN_TEXT 10163208: leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi jmp 80f ALIGN_TEXT 10040804: leaq -4(%rdi,%rdx),%rdi leaq -4(%rsi,%rdx),%rsi jmp 1f ALIGN_TEXT 80: movl (%rdi),%r8d movl (%rsi),%r9d cmpl %r8d,%r9d jne 1f leaq 4(%rdi),%rdi leaq 4(%rsi),%rsi /* * We have up to 4 bytes to inspect. */ 1: movzbl (%rdi),%eax movzbl (%rsi),%r8d cmpb %r8b,%al jne 2f movzbl 1(%rdi),%eax movzbl 1(%rsi),%r8d cmpb %r8b,%al jne 2f movzbl 2(%rdi),%eax movzbl 2(%rsi),%r8d cmpb %r8b,%al jne 2f movzbl 3(%rdi),%eax movzbl 3(%rsi),%r8d 2: subl %r8d,%eax ret +END(memcmp) #endif -ARCHEND(memcmp, scalar) - -ARCHENTRY(memcmp, baseline) - cmp $32, %rdx # enough to permit use of the long kernel? - ja .Llong - - test %rdx, %rdx # zero bytes buffer? - je .L0 - - /* - * Compare strings of 1--32 bytes. We want to do this by - * loading into two xmm registers and then comparing. To avoid - * crossing into unmapped pages, we either load 32 bytes from - * the start of the buffer or 32 bytes before its end, depending - * on whether there is a page boundary between the overread area - * or not. - */ - - /* check for page boundaries overreads */ - lea 31(%rdi), %eax # end of overread - lea 31(%rsi), %r8d - lea -1(%rdi, %rdx, 1), %ecx # last character in buffer - lea -1(%rsi, %rdx, 1), %r9d - xor %ecx, %eax - xor %r9d, %r8d - test $PAGE_SIZE, %eax # are they on different pages? - jz 0f - - /* fix up rdi */ - movdqu -32(%rdi, %rdx, 1), %xmm0 - movdqu -16(%rdi, %rdx, 1), %xmm1 - lea -8(%rsp), %rdi # end of replacement buffer - sub %rdx, %rdi # start of replacement buffer - movdqa %xmm0, -40(%rsp) # copy to replacement buffer - movdqa %xmm1, -24(%rsp) - -0: test $PAGE_SIZE, %r8d - jz 0f - - /* fix up rsi */ - movdqu -32(%rsi, %rdx, 1), %xmm0 - movdqu -16(%rsi, %rdx, 1), %xmm1 - lea -40(%rsp), %rsi # end of replacement buffer - sub %rdx, %rsi # start of replacement buffer - movdqa %xmm0, -72(%rsp) # copy to replacement buffer - movdqa %xmm1, -56(%rsp) - - /* load data and compare properly */ -0: movdqu 16(%rdi), %xmm1 - movdqu 16(%rsi), %xmm3 - movdqu (%rdi), %xmm0 - movdqu (%rsi), %xmm2 - mov %edx, %ecx - mov $-1, %edx - shl %cl, %rdx # ones where the buffer is not - pcmpeqb %xmm3, %xmm1 - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm1, %ecx - pmovmskb %xmm0, %eax - shl $16, %ecx - or %ecx, %eax # ones where the buffers match - or %edx, %eax # including where the buffer is not - not %eax # ones where there is a mismatch -#ifndef BCMP - bsf %eax, %edx # location of the first mismatch - cmovz %eax, %edx # including if there is no mismatch - movzbl (%rdi, %rdx, 1), %eax # mismatching bytes - movzbl (%rsi, %rdx, 1), %edx - sub %edx, %eax -#endif - ret - - /* empty input */ -.L0: xor %eax, %eax - ret - - /* compare 33+ bytes */ - ALIGN_TEXT -.Llong: movdqu (%rdi), %xmm0 # load head - movdqu (%rsi), %xmm2 - mov %rdi, %rcx - sub %rdi, %rsi # express rsi as distance from rdi - and $~0xf, %rdi # align rdi to 16 bytes - movdqu 16(%rsi, %rdi, 1), %xmm1 - pcmpeqb 16(%rdi), %xmm1 # compare second half of this iteration - add %rcx, %rdx # pointer to last byte in buffer - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - xor $0xffff, %eax # any mismatch? - jne .Lmismatch_head - add $64, %rdi # advance to next iteration - jmp 1f # and get going with the loop - - /* process buffer 32 bytes at a time */ - ALIGN_TEXT -0: movdqu -32(%rsi, %rdi, 1), %xmm0 - movdqu -16(%rsi, %rdi, 1), %xmm1 - pcmpeqb -32(%rdi), %xmm0 - pcmpeqb -16(%rdi), %xmm1 - add $32, %rdi # advance to next iteration -1: pand %xmm0, %xmm1 # 0xff where both halves matched - pmovmskb %xmm1, %eax - cmp $0xffff, %eax # all bytes matched? - jne .Lmismatch - cmp %rdx, %rdi # end of buffer reached? - jb 0b - - /* less than 32 bytes left to compare */ - movdqu -16(%rdx), %xmm1 # load 32 byte tail through end pointer - movdqu -16(%rdx, %rsi, 1), %xmm3 - movdqu -32(%rdx), %xmm0 - movdqu -32(%rdx, %rsi, 1), %xmm2 - pcmpeqb %xmm3, %xmm1 - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm1, %ecx - pmovmskb %xmm0, %eax - shl $16, %ecx - or %ecx, %eax # ones where the buffers match - not %eax # ones where there is a mismatch -#ifndef BCMP - bsf %eax, %ecx # location of the first mismatch - cmovz %eax, %ecx # including if there is no mismatch - add %rcx, %rdx # pointer to potential mismatch - movzbl -32(%rdx), %eax # mismatching bytes - movzbl -32(%rdx, %rsi, 1), %edx - sub %edx, %eax -#endif - ret - -#ifdef BCMP -.Lmismatch: - mov $1, %eax -.Lmismatch_head: - ret -#else /* memcmp */ -.Lmismatch_head: - tzcnt %eax, %eax # location of mismatch - add %rax, %rcx # pointer to mismatch - movzbl (%rcx), %eax # mismatching bytes - movzbl (%rcx, %rsi, 1), %ecx - sub %ecx, %eax - ret - -.Lmismatch: - movdqu -48(%rsi, %rdi, 1), %xmm1 - pcmpeqb -48(%rdi), %xmm1 # reconstruct xmm1 before PAND - pmovmskb %xmm0, %eax # mismatches in first 16 bytes - pmovmskb %xmm1, %edx # mismatches in second 16 bytes - shl $16, %edx - or %edx, %eax # mismatches in both - not %eax # matches in both - tzcnt %eax, %eax # location of mismatch - add %rax, %rdi # pointer to mismatch - movzbl -64(%rdi), %eax # mismatching bytes - movzbl -64(%rdi, %rsi, 1), %ecx - sub %ecx, %eax - ret -#endif -ARCHEND(memcmp, baseline) .section .note.GNU-stack,"",%progbits diff --git a/lib/libc/amd64/string/stpcpy.S b/lib/libc/amd64/string/stpcpy.S index 59358e3245a8..73c765556dc1 100644 --- a/lib/libc/amd64/string/stpcpy.S +++ b/lib/libc/amd64/string/stpcpy.S @@ -1,237 +1,114 @@ -/*- - * Copyright (c) 2023, The FreeBSD Foundation - * - * SPDX-License-Expression: BSD-2-Clause - * - * Portions of this software were developed by Robert Clausecker - * under sponsorship from the FreeBSD Foundation. - * - * Adapted from NetBSD's common/lib/libc/arch/x86_64/string/strcpy.S - * written by J.T. Conklin and - * adapted by Guillaume Morin to implement stpcpy - * that was originally dedicated to the public domain +/* + * Adapted by Guillaume Morin from strcpy.S + * written by J.T. Conklin + * Public domain. */ #include - -#include "amd64_archlevel.h" - -#define ALIGN_TEXT .p2align 4, 0x90 - - .weak stpcpy - .set stpcpy, __stpcpy -ARCHFUNCS(__stpcpy) - ARCHFUNC(__stpcpy, scalar) - ARCHFUNC(__stpcpy, baseline) -ENDARCHFUNCS(__stpcpy) - /* * This stpcpy implementation copies a byte at a time until the * source pointer is aligned to a word boundary, it then copies by * words until it finds a word containing a zero byte, and finally * copies by bytes until the end of the string is reached. * * While this may result in unaligned stores if the source and * destination pointers are unaligned with respect to each other, * it is still faster than either byte copies or the overhead of * an implementation suitable for machines with strict alignment * requirements. */ -ARCHENTRY(__stpcpy, scalar) + .globl stpcpy,__stpcpy +ENTRY(stpcpy) +__stpcpy: movabsq $0x0101010101010101,%r8 movabsq $0x8080808080808080,%r9 /* * Align source to a word boundary. * Consider unrolling loop? */ .Lalign: testb $7,%sil je .Lword_aligned movb (%rsi),%dl incq %rsi movb %dl,(%rdi) incq %rdi testb %dl,%dl jne .Lalign movq %rdi,%rax dec %rax ret - ALIGN_TEXT + .p2align 4 .Lloop: movq %rdx,(%rdi) addq $8,%rdi .Lword_aligned: movq (%rsi),%rdx movq %rdx,%rcx addq $8,%rsi subq %r8,%rcx testq %r9,%rcx je .Lloop /* * In rare cases, the above loop may exit prematurely. We must * return to the loop if none of the bytes in the word equal 0. */ movb %dl,(%rdi) testb %dl,%dl /* 1st byte == 0? */ je .Ldone incq %rdi shrq $8,%rdx movb %dl,(%rdi) testb %dl,%dl /* 2nd byte == 0? */ je .Ldone incq %rdi shrq $8,%rdx movb %dl,(%rdi) testb %dl,%dl /* 3rd byte == 0? */ je .Ldone incq %rdi shrq $8,%rdx movb %dl,(%rdi) testb %dl,%dl /* 4th byte == 0? */ je .Ldone incq %rdi shrq $8,%rdx movb %dl,(%rdi) testb %dl,%dl /* 5th byte == 0? */ je .Ldone incq %rdi shrq $8,%rdx movb %dl,(%rdi) testb %dl,%dl /* 6th byte == 0? */ je .Ldone incq %rdi shrq $8,%rdx movb %dl,(%rdi) testb %dl,%dl /* 7th byte == 0? */ je .Ldone incq %rdi shrq $8,%rdx movb %dl,(%rdi) incq %rdi testb %dl,%dl /* 8th byte == 0? */ jne .Lword_aligned decq %rdi .Ldone: movq %rdi,%rax ret -ARCHEND(__stpcpy, scalar) - -ARCHENTRY(__stpcpy, baseline) - mov %esi, %ecx - mov %rdi, %rdx - sub %rsi, %rdi # express destination as distance to surce - and $~0xf, %rsi # align source to 16 byte - movdqa (%rsi), %xmm0 # head of string with junk before - pxor %xmm1, %xmm1 - and $0xf, %ecx # misalignment in bytes - pcmpeqb %xmm1, %xmm0 # NUL byte present? - pmovmskb %xmm0, %eax - shr %cl, %eax # clear out matches in junk bytes - bsf %eax, %eax # find match if any - jnz .Lrunt - - /* first normal iteration: write head back if it succeeds */ - movdqa 16(%rsi), %xmm0 # 16 bytes of current iteration - movdqu (%rsi, %rcx, 1), %xmm2 # first 16 bytes of the string - pcmpeqb %xmm0, %xmm1 # NUL byte present? - pmovmskb %xmm1, %eax - test %eax, %eax # find match if any - jnz .Lshorty - - movdqu %xmm2, (%rdx) # store beginning of string - - /* main loop, unrolled twice */ - ALIGN_TEXT -0: movdqa 32(%rsi), %xmm2 # load current iteraion - movdqu %xmm0, 16(%rsi, %rdi, 1) # write back previous iteraion - pxor %xmm1, %xmm1 - add $32, %rsi - pcmpeqb %xmm2, %xmm1 # NUL byte present? - pmovmskb %xmm1, %eax - test %eax, %eax - jnz 1f - - movdqa 16(%rsi), %xmm0 # load current iteraion - movdqu %xmm2, (%rsi, %rdi, 1) # write back previous iteraion - pxor %xmm1, %xmm1 - pcmpeqb %xmm0, %xmm1 # NUL byte present? - pmovmskb %xmm1, %eax - test %eax, %eax - jz 0b - - /* end of string after main loop has iterated */ - add $16, %rsi # advance rsi to second unrolled half -1: tzcnt %eax, %eax # find location of match - # (behaves as bsf on pre-x86-64-v3 CPUs) - add %rsi, %rax # point to NUL byte - movdqu -15(%rax), %xmm0 # last 16 bytes of string - movdqu %xmm0, -15(%rax, %rdi, 1) # copied to destination - add %rdi, %rax # point to destination's NUL byte - ret - - /* NUL encountered in second iteration */ -.Lshorty: - tzcnt %eax, %eax - add $16, %eax # account for length of first iteration - sub %ecx, %eax # but not the parts before the string - - /* NUL encountered in first iteration */ -.Lrunt: lea 1(%rax), %edi # string length including NUL byte - add %rcx, %rsi # point to beginning of string - add %rdx, %rax # point to NUL byte - - /* transfer 16--32 bytes */ -.L1632: cmp $16, %edi - jb .L0815 - - movdqu -16(%rsi, %rdi, 1), %xmm0 # load last 16 bytes - movdqu %xmm2, (%rdx) # store first 16 bytes - movdqu %xmm0, -15(%rax) # store last 16 bytes - ret - - /* transfer 8--15 bytes */ -.L0815: cmp $8, %edi - jb .L0407 - - mov (%rsi), %rcx # load first 8 bytes - mov -8(%rsi, %rdi, 1), %rdi # load last 8 bytes - mov %rcx, (%rdx) # store to dst - mov %rdi, -7(%rax) # dito - ret - - /* transfer 4--7 bytes */ -.L0407: cmp $4, %edi - jb .L0203 - - mov (%rsi), %ecx - mov -4(%rsi, %rdi, 1), %edi - mov %ecx, (%rdx) - mov %edi, -3(%rax) - ret - - /* transfer 2--3 bytes */ -.L0203: cmp $2, %edi - jb .L0101 - - movzwl (%rsi), %ecx - mov %cx, (%rdx) # store first two bytes - - /* transfer 0 bytes (last byte is always NUL) */ -.L0101: movb $0, (%rax) # store terminating NUL byte - ret -ARCHEND(__stpcpy, baseline) - +END(stpcpy) + .section .note.GNU-stack,"",%progbits diff --git a/lib/libc/amd64/string/strchrnul.S b/lib/libc/amd64/string/strchrnul.S deleted file mode 100644 index 0e70b02311d7..000000000000 --- a/lib/libc/amd64/string/strchrnul.S +++ /dev/null @@ -1,170 +0,0 @@ -/*- - * Copyright (c) 2023 The FreeBSD Foundation - * - * This software was developed by Robert Clausecker - * under sponsorship from the FreeBSD Foundation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE - */ - -#include - -#include "amd64_archlevel.h" - -#define ALIGN_TEXT .p2align 4,0x90 # 16-byte alignment, nop-filled - - .weak strchrnul - .set strchrnul, __strchrnul - -ARCHFUNCS(__strchrnul) - ARCHFUNC(__strchrnul, scalar) - ARCHFUNC(__strchrnul, baseline) -ENDARCHFUNCS(__strchrnul) - -/* - * strchrnul(str, c) - * This is implemented like strlen(str), but we check for the - * presence of both NUL and c in each iteration. - */ -ARCHENTRY(__strchrnul, scalar) - mov %edi, %ecx - and $~7, %rdi # align to 8 byte - movzbl %sil, %esi # clear stray high bits - movabs $0x0101010101010101, %r8 - mov (%rdi), %rax # load first word - imul %r8, %rsi # replicate char 8 times - - /* - * Unaligned input: align to 8 bytes. Then proceed the same - * way as with aligned input, but prevent matches before the - * beginning of the string. This is achieved by oring 0x01 - * into each byte of the buffer before the string - */ - shl $3, %ecx - mov %r8, %r10 - add $8, %rdi - shl %cl, %r10 # 0x01 where the string is - xor %r8, %r10 # 0x01 where it is not - neg %r8 # negate 01..01 so we can use lea - movabs $0x8080808080808080, %r9 - - mov %rsi, %rcx - xor %rax, %rcx # str ^ c - or %r10, %rax # str without NUL bytes before it - or %r10, %rcx # (str ^ c) without matches before it - lea (%rax, %r8, 1), %rdx # str - 0x01..01 - lea (%rcx, %r8, 1), %r11 # (str ^ c) - 0x01..01 - not %rax # ~str - not %rcx # ~(str ^ c) - and %rdx, %rax # (str - 0x01..01) & ~str - and %r11, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c) - or %rcx, %rax # matches for both - and %r9, %rax # not including junk bytes - jnz 1f - - /* main loop unrolled twice */ - ALIGN_TEXT -0: mov (%rdi), %rax # str - mov %rsi, %rcx - xor %rax, %rcx # str ^ c - lea (%rax, %r8, 1), %rdx # str - 0x01..01 - lea (%rcx, %r8, 1), %r11 # (str ^ c) - 0x01..01 - not %rax # ~str - not %rcx # ~(str ^ c) - and %rdx, %rax # (str - 0x01..01) & ~str - and %r11, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c) - or %rcx, %rax # matches for both - and %r9, %rax # not including junk bits - jnz 2f - - mov 8(%rdi), %rax # str - add $16, %rdi - mov %rsi, %rcx - xor %rax, %rcx # str ^ c - lea (%rax, %r8, 1), %rdx # str - 0x01..01 - lea (%rcx, %r8, 1), %r11 # (str ^ c) - 0x01..01 - not %rax # ~str - not %rcx # ~(str ^ c) - and %rdx, %rax # (str - 0x01..01) & ~str - and %r11, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c) - or %rcx, %rax # matches for both - and %r9, %rax # not including junk bits - jz 0b - - /* NUL or c found */ -1: sub $8, %rdi # undo advance past buffer -2: tzcnt %rax, %rax # first NUL or c byte match - shr $3, %eax # scale from bit to byte index - add %rdi, %rax # pointer to found c or NUL - ret -ARCHEND(__strchrnul, scalar) - -ARCHENTRY(__strchrnul, baseline) - mov %edi, %ecx - and $~0xf, %rdi # align to 16 byte - movdqa (%rdi), %xmm1 - movd %esi, %xmm0 - and $0xf, %ecx # distance from (%rdi) to start of string - pxor %xmm2, %xmm2 - mov $-1, %edx - punpcklbw %xmm0, %xmm0 # c -> cc - shl %cl, %edx # bits corresponding to bytes in the string - punpcklwd %xmm0, %xmm0 # cc -> cccc - add $16, %rdi - - /* check for match in head */ - pcmpeqb %xmm1, %xmm2 # NUL bytes present? - pshufd $0, %xmm0, %xmm0 # cccc -> cccccccccccccccc - pcmpeqb %xmm0, %xmm1 # c present? - por %xmm2, %xmm1 # either present? - pmovmskb %xmm1, %eax - and %edx, %eax # match in the string? - jnz 1f - - /* main loop unrolled twice */ - ALIGN_TEXT -0: movdqa (%rdi), %xmm1 - pxor %xmm2, %xmm2 - pcmpeqb %xmm1, %xmm2 # NUL bytes present? - pcmpeqb %xmm0, %xmm1 # c present? - por %xmm2, %xmm1 # either present? - pmovmskb %xmm1, %eax - test %eax, %eax # match in the string? - jnz 2f - - movdqa 16(%rdi), %xmm1 - add $32, %rdi - pxor %xmm2, %xmm2 - pcmpeqb %xmm1, %xmm2 # NUL bytes present? - pcmpeqb %xmm0, %xmm1 # c present? - por %xmm2, %xmm1 # either present? - pmovmskb %xmm1, %eax - test %eax, %eax # match in the string? - jz 0b - -1: sub $16, %rdi # undo advance past buffer -2: tzcnt %eax, %eax # where is the match? - add %rdi, %rax # pointer to found c or NUL - ret -ARCHEND(__strchrnul, baseline) - - .section .note.GNU-stack,"",%progbits diff --git a/lib/libc/amd64/string/strlen.S b/lib/libc/amd64/string/strlen.S index cc248af001ac..88328d40e48b 100644 --- a/lib/libc/amd64/string/strlen.S +++ b/lib/libc/amd64/string/strlen.S @@ -1,131 +1,80 @@ -/*- +/* * Written by Mateusz Guzik - * Copyright (c) 2023 The FreeBSD Foundation - * - * Portions of this software were developed by Robert Clausecker - * under sponsorship from the FreeBSD Foundation. - * * Public domain. */ #include -#include "amd64_archlevel.h" /* * Note: this routine was written with kernel use in mind (read: no simd), * it is only present in userspace as a temporary measure until something * better gets imported. */ #define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ -ARCHFUNCS(strlen) - ARCHFUNC(strlen, scalar) - ARCHFUNC(strlen, baseline) -ENDARCHFUNCS(strlen) - /* * strlen(string) * %rdi * * Uses the ((x - 0x01....01) & ~x & 0x80....80) trick. * * 0x01....01 is replaced with 0x0 - 0x01....01 so that it can be added * with leaq. * * For a description see either: * - "Hacker's Delight" by Henry S. Warren, Jr. * - "Optimizing subroutines in assembly language: An optimization guide for x86 platforms" * by Agner Fog * * The latter contains a 32-bit variant of the same algorithm coded in assembly for i386. */ -ARCHENTRY(strlen, scalar) +ENTRY(strlen) movabsq $0xfefefefefefefeff,%r8 movabsq $0x8080808080808080,%r9 movq %rdi,%r10 movq %rdi,%rcx testb $7,%dil jz 2f /* * Handle misaligned reads: align to 8 and fill * the spurious bytes. */ andq $~7,%rdi movq (%rdi),%r11 shlq $3,%rcx movq $-1,%rdx shlq %cl,%rdx notq %rdx orq %rdx,%r11 leaq (%r11,%r8),%rcx notq %r11 andq %r11,%rcx andq %r9,%rcx jnz 3f /* * Main loop. */ ALIGN_TEXT 1: leaq 8(%rdi),%rdi 2: movq (%rdi),%r11 leaq (%r11,%r8),%rcx notq %r11 andq %r11,%rcx andq %r9,%rcx jz 1b 3: bsfq %rcx,%rcx shrq $3,%rcx leaq (%rcx,%rdi),%rax subq %r10,%rax ret -ARCHEND(strlen, scalar) - -ARCHENTRY(strlen, baseline) - mov %rdi, %rcx - pxor %xmm1, %xmm1 - and $~0xf, %rdi # align string - pcmpeqb (%rdi), %xmm1 # compare head (with junk before string) - mov %rcx, %rsi # string pointer copy for later - and $0xf, %ecx # amount of bytes rdi is past 16 byte alignment - pmovmskb %xmm1, %eax - add $32, %rdi # advance to next iteration - shr %cl, %eax # clear out matches in junk bytes - test %eax, %eax # any match? (can't use ZF from SHR as CL=0 is possible) - jnz 2f - - ALIGN_TEXT -1: pxor %xmm1, %xmm1 - pcmpeqb -16(%rdi), %xmm1 # find NUL bytes - pmovmskb %xmm1, %eax - test %eax, %eax # were any NUL bytes present? - jnz 3f - - /* the same unrolled once more */ - pxor %xmm1, %xmm1 - pcmpeqb (%rdi), %xmm1 - pmovmskb %xmm1, %eax - add $32, %rdi # advance to next iteration - test %eax, %eax - jz 1b - - /* match found in loop body */ - sub $16, %rdi # undo half the advancement -3: tzcnt %eax, %eax # find the first NUL byte - sub %rsi, %rdi # string length until beginning of (%rdi) - lea -16(%rdi, %rax, 1), %rax # that plus loc. of NUL byte: full string length - ret - - /* match found in head */ -2: tzcnt %eax, %eax # compute string length - ret -ARCHEND(strlen, baseline) +END(strlen) .section .note.GNU-stack,"",%progbits diff --git a/lib/libc/string/string.3 b/lib/libc/string/string.3 index b389c58acc07..a164aae01eec 100644 --- a/lib/libc/string/string.3 +++ b/lib/libc/string/string.3 @@ -1,157 +1,156 @@ .\" Copyright (c) 1990, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" This code is derived from software contributed to Berkeley by .\" Chris Torek. .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)string.3 8.2 (Berkeley) 12/11/93 .\" -.Dd September 2, 2023 +.Dd December 11, 1993 .Dt STRING 3 .Os .Sh NAME .Nm stpcpy , .Nm strcat , .Nm strncat , .Nm strchr , .Nm strrchr , .Nm strcmp , .Nm strncmp , .Nm strcasecmp , .Nm strncasecmp , .Nm strcpy , .Nm strncpy , .Nm strerror , .Nm strlen , .Nm strpbrk , .Nm strsep , .Nm strspn , .Nm strcspn , .Nm strstr , .Nm strtok , .Nm index , .Nm rindex .Nd string specific functions .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In string.h .Ft char * .Fn stpcpy "char *dst" "const char *src" .Ft char * .Fn strcat "char *s" "const char * append" .Ft char * .Fn strncat "char *s" "const char *append" "size_t count" .Ft char * .Fn strchr "const char *s" "int c" .Ft char * .Fn strrchr "const char *s" "int c" .Ft int .Fn strcmp "const char *s1" "const char *s2" .Ft int .Fn strncmp "const char *s1" "const char *s2" "size_t count" .Ft int .Fn strcasecmp "const char *s1" "const char *s2" .Ft int .Fn strncasecmp "const char *s1" "const char *s2" "size_t count" .Ft char * .Fn strcpy "char *dst" "const char *src" .Ft char * .Fn strncpy "char *dst" "const char *src" "size_t count" .Ft char * .Fn strerror "int errno" .Ft size_t .Fn strlen "const char *s" .Ft char * .Fn strpbrk "const char *s" "const char *charset" .Ft char * .Fn strsep "char **stringp" "const char *delim" .Ft size_t .Fn strspn "const char *s" "const char *charset" .Ft size_t .Fn strcspn "const char *s" "const char *charset" .Ft char * .Fn strstr "const char *big" "const char *little" .Ft char * .Fn strtok "char *s" "const char *delim" .Ft char * .Fn index "const char *s" "int c" .Ft char * .Fn rindex "const char *s" "int c" .Sh DESCRIPTION The string functions manipulate strings terminated by a null byte. .Pp See the specific manual pages for more information. For manipulating variable length generic objects as byte strings (without the null byte check), see .Xr bstring 3 . .Pp Except as noted in their specific manual pages, the string functions do not test the destination for size limitations. .Sh SEE ALSO .Xr bstring 3 , .Xr index 3 , .Xr rindex 3 , .Xr stpcpy 3 , .Xr strcasecmp 3 , .Xr strcat 3 , .Xr strchr 3 , .Xr strcmp 3 , .Xr strcpy 3 , .Xr strcspn 3 , .Xr strerror 3 , .Xr strlen 3 , .Xr strpbrk 3 , .Xr strrchr 3 , .Xr strsep 3 , .Xr strspn 3 , .Xr strstr 3 , -.Xr strtok 3 , -.Xr simd 7 +.Xr strtok 3 .Sh STANDARDS The .Fn strcat , .Fn strncat , .Fn strchr , .Fn strrchr , .Fn strcmp , .Fn strncmp , .Fn strcpy , .Fn strncpy , .Fn strerror , .Fn strlen , .Fn strpbrk , .Fn strspn , .Fn strcspn , .Fn strstr , and .Fn strtok functions conform to .St -isoC . diff --git a/share/man/man7/Makefile b/share/man/man7/Makefile index 43d37fa33275..60a52ae5327c 100644 --- a/share/man/man7/Makefile +++ b/share/man/man7/Makefile @@ -1,60 +1,59 @@ # @(#)Makefile 8.1 (Berkeley) 6/5/93 .include #MISSING: eqnchar.7 ms.7 term.7 MAN= arch.7 \ ascii.7 \ bsd.snmpmod.mk.7 \ build.7 \ c.7 \ clocks.7 \ crypto.7 \ development.7 \ environ.7 \ ffs.7 \ firewall.7 \ growfs.7 \ hier.7 \ hostname.7 \ intro.7 \ maclabel.7 \ operator.7 \ orders.7 \ ports.7 \ release.7 \ sdoc.7 \ security.7 \ - simd.7 \ sizeof.7 \ sprog.7 \ stats.7 \ stdint.7 \ sticky.7 \ tests.7 \ tuning.7 MLINKS= intro.7 miscellaneous.7 MLINKS+= growfs.7 growfs_fstab.7 MLINKS+= security.7 securelevel.7 MLINKS+= c.7 c78.7 MLINKS+= c.7 c89.7 MLINKS+= c.7 c90.7 MLINKS+= c.7 c95.7 MLINKS+= c.7 c99.7 MLINKS+= c.7 c11.7 MLINKS+= c.7 c17.7 MLINKS+= c.7 c2x.7 .if ${MK_TESTS} != "no" ATF= ${SRCTOP}/contrib/atf .PATH: ${ATF}/doc MAN+= atf.7 CLEANFILES+= atf.7 atf.7: atf.7.in sed -e 's,__DOCDIR__,/usr/share/doc/atf,g' \ <"${ATF}/doc/atf.7.in" >atf.7 .endif .include diff --git a/share/man/man7/arch.7 b/share/man/man7/arch.7 index 14c6af612087..8e89203e4003 100644 --- a/share/man/man7/arch.7 +++ b/share/man/man7/arch.7 @@ -1,440 +1,439 @@ .\" Copyright (c) 2016-2017 The FreeBSD Foundation. .\" .\" This documentation was created by Ed Maste under sponsorship of .\" The FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd September 2, 2023 +.Dd April 12, 2023 .Dt ARCH 7 .Os .Sh NAME .Nm arch .Nd Architecture-specific details .Sh DESCRIPTION Differences between CPU architectures and platforms supported by .Fx . .Ss Introduction This document is a quick reference of key ABI details of .Fx architecture ports. For full details consult the processor-specific ABI supplement documentation. .Pp If not explicitly mentioned, sizes are in bytes. The architecture details in this document apply to .Fx 12.0 and later, unless otherwise noted. .Pp .Fx uses a flat address space. Variables of types .Vt unsigned long , .Vt uintptr_t , and .Vt size_t and pointers all have the same representation. .Pp In order to maximize compatibility with future pointer integrity mechanisms, manipulations of pointers as integers should be performed via .Vt uintptr_t or .Vt intptr_t and no other types. In particular, .Vt long and .Vt ptrdiff_t should be avoided. .Pp On some architectures, e.g., .Dv powerpc and AIM variants of .Dv powerpc64 , the kernel uses a separate address space. On other architectures, kernel and a user mode process share a single address space. The kernel is located at the highest addresses. .Pp On each architecture, the main user mode thread's stack starts near the highest user address and grows down. .Pp .Fx architecture support varies by release. This table shows currently supported CPU architectures along with the first .Fx release to support each architecture. .Bl -column -offset indent "Architecture" "Initial Release" .It Sy Architecture Ta Sy Initial Release .It aarch64 Ta 11.0 .It amd64 Ta 5.1 .It armv6 Ta 10.0 .It armv7 Ta 12.0 .It i386 Ta 1.0 .It powerpc Ta 6.0 .It powerpcspe Ta 12.0 .It powerpc64 Ta 9.0 .It powerpc64le Ta 13.0 .It riscv64 Ta 12.0 .El .Pp Discontinued architectures are shown in the following table. .Bl -column -offset indent "Architecture" "Initial Release" "Final Release" .It Sy Architecture Ta Sy Initial Release Ta Sy Final Release .It alpha Ta 3.2 Ta 6.4 .It arm Ta 6.0 Ta 12.x .It armeb Ta 8.0 Ta 11.4 .It ia64 Ta 5.0 Ta 10.4 .It mips Ta 8.0 Ta 13.x .It mipsel Ta 9.0 Ta 13.x .It mipselhf Ta 12.0 Ta 13.x .It mipshf Ta 12.0 Ta 13.x .It mipsn32 Ta 9.0 Ta 13.x .It mips64 Ta 9.0 Ta 13.x .It mips64el Ta 9.0 Ta 13.x .It mips64elhf Ta 12.0 Ta 13.x .It mips64hf Ta 12.0 Ta 13.x .It pc98 Ta 2.2 Ta 11.4 .It riscv64sf Ta 12.0 Ta 13.x .It sparc64 Ta 5.0 Ta 12.x .El .Ss Type sizes All .Fx architectures use some variant of the ELF (see .Xr elf 5 ) .Sy Application Binary Interface (ABI) for the machine processor. All supported ABIs can be divided into two groups: .Bl -tag -width "Dv ILP32" .It Dv ILP32 .Vt int , .Vt long , .Vt void * types machine representations all have 4-byte size. .It Dv LP64 .Vt int type machine representation uses 4 bytes, while .Vt long and .Vt void * are 8 bytes. .El .Pp Some machines support more than one .Fx ABI. Typically these are 64-bit machines, where the .Dq native .Dv LP64 execution environment is accompanied by the .Dq legacy .Dv ILP32 environment, which was the historical 32-bit predecessor for 64-bit evolution. Examples are: .Bl -column -offset indent "powerpc64" "ILP32 counterpart" .It Sy LP64 Ta Sy ILP32 counterpart .It Dv amd64 Ta Dv i386 .It Dv powerpc64 Ta Dv powerpc .It Dv aarch64 Ta Dv armv6/armv7 .El .Pp .Dv aarch64 will support execution of .Dv armv6 or .Dv armv7 binaries if the CPU implements .Dv AArch32 execution state, however older .Dv armv4 and .Dv armv5 binaries aren't supported. .Pp On all supported architectures: .Bl -column -offset -indent "long long" "Size" .It Sy Type Ta Sy Size .It short Ta 2 .It int Ta 4 .It long Ta sizeof(void*) .It long long Ta 8 .It float Ta 4 .It double Ta 8 .El .Pp Integers are represented in two's complement. Alignment of integer and pointer types is natural, that is, the address of the variable must be congruent to zero modulo the type size. Most ILP32 ABIs, except .Dv arm , require only 4-byte alignment for 64-bit integers. .Pp Machine-dependent type sizes: .Bl -column -offset indent "Architecture" "void *" "long double" "time_t" .It Sy Architecture Ta Sy void * Ta Sy long double Ta Sy time_t .It aarch64 Ta 8 Ta 16 Ta 8 .It amd64 Ta 8 Ta 16 Ta 8 .It armv6 Ta 4 Ta 8 Ta 8 .It armv7 Ta 4 Ta 8 Ta 8 .It i386 Ta 4 Ta 12 Ta 4 .It powerpc Ta 4 Ta 8 Ta 8 .It powerpcspe Ta 4 Ta 8 Ta 8 .It powerpc64 Ta 8 Ta 8 Ta 8 .It powerpc64le Ta 8 Ta 8 Ta 8 .It riscv64 Ta 8 Ta 16 Ta 8 .El .Pp .Sy time_t is 8 bytes on all supported architectures except i386. .Ss Endianness and Char Signedness .Bl -column -offset indent "Architecture" "Endianness" "char Signedness" .It Sy Architecture Ta Sy Endianness Ta Sy char Signedness .It aarch64 Ta little Ta unsigned .It amd64 Ta little Ta signed .It armv6 Ta little Ta unsigned .It armv7 Ta little Ta unsigned .It i386 Ta little Ta signed .It powerpc Ta big Ta unsigned .It powerpcspe Ta big Ta unsigned .It powerpc64 Ta big Ta unsigned .It powerpc64le Ta little Ta unsigned .It riscv64 Ta little Ta signed .El .Ss Page Size .Bl -column -offset indent "Architecture" "Page Sizes" .It Sy Architecture Ta Sy Page Sizes .It aarch64 Ta 4K, 2M, 1G .It amd64 Ta 4K, 2M, 1G .It armv6 Ta 4K, 1M .It armv7 Ta 4K, 1M .It i386 Ta 4K, 2M (PAE), 4M .It powerpc Ta 4K .It powerpcspe Ta 4K .It powerpc64 Ta 4K .It powerpc64le Ta 4K .It riscv64 Ta 4K, 2M, 1G .El .Ss Floating Point .Bl -column -offset indent "Architecture" "float, double" "long double" .It Sy Architecture Ta Sy float, double Ta Sy long double .It aarch64 Ta hard Ta soft, quad precision .It amd64 Ta hard Ta hard, 80 bit .It armv6 Ta hard Ta hard, double precision .It armv7 Ta hard Ta hard, double precision .It i386 Ta hard Ta hard, 80 bit .It powerpc Ta hard Ta hard, double precision .It powerpcspe Ta hard Ta hard, double precision .It powerpc64 Ta hard Ta hard, double precision .It powerpc64le Ta hard Ta hard, double precision .It riscv64 Ta hard Ta hard, quad precision .El .Ss Default Tool Chain .Fx uses .Xr clang 1 as the default compiler on all supported CPU architectures, LLVM's .Xr ld.lld 1 as the default linker, and ELF Tool Chain binary utilities such as .Xr objcopy 1 and .Xr readelf 1 . .Ss MACHINE_ARCH vs MACHINE_CPUARCH vs MACHINE .Dv MACHINE_CPUARCH should be preferred in Makefiles when the generic architecture is being tested. .Dv MACHINE_ARCH should be preferred when there is something specific to a particular type of architecture where there is a choice of many, or could be a choice of many. Use .Dv MACHINE when referring to the kernel, interfaces dependent on a specific type of kernel or similar things like boot sequences. .Bl -column -offset indent "Dv MACHINE" "Dv MACHINE_CPUARCH" "Dv MACHINE_ARCH" .It Dv MACHINE Ta Dv MACHINE_CPUARCH Ta Dv MACHINE_ARCH .It arm64 Ta aarch64 Ta aarch64 .It amd64 Ta amd64 Ta amd64 .It arm Ta arm Ta armv6, armv7 .It i386 Ta i386 Ta i386 .It powerpc Ta powerpc Ta powerpc, powerpcspe, powerpc64, powerpc64le .It riscv Ta riscv Ta riscv64 .El .Ss Predefined Macros The compiler provides a number of predefined macros. Some of these provide architecture-specific details and are explained below. Other macros, including those required by the language standard, are not included here. .Pp The full set of predefined macros can be obtained with this command: .Bd -literal -offset indent cc -x c -dM -E /dev/null .Ed .Pp Common type size and endianness macros: .Bl -column -offset indent "BYTE_ORDER" "Meaning" .It Sy Macro Ta Sy Meaning .It Dv __LP64__ Ta 64-bit (8-byte) long and pointer, 32-bit (4-byte) int .It Dv __ILP32__ Ta 32-bit (4-byte) int, long and pointer .It Dv BYTE_ORDER Ta Either Dv BIG_ENDIAN or Dv LITTLE_ENDIAN . .Dv PDP11_ENDIAN is not used on .Fx . .El .Pp Architecture-specific macros: .Bl -column -offset indent "Architecture" "Predefined macros" .It Sy Architecture Ta Sy Predefined macros .It aarch64 Ta Dv __aarch64__ .It amd64 Ta Dv __amd64__ , Dv __x86_64__ .It armv6 Ta Dv __arm__ , Dv __ARM_ARCH >= 6 .It armv7 Ta Dv __arm__ , Dv __ARM_ARCH >= 7 .It i386 Ta Dv __i386__ .It powerpc Ta Dv __powerpc__ .It powerpcspe Ta Dv __powerpc__ , Dv __SPE__ .It powerpc64 Ta Dv __powerpc__ , Dv __powerpc64__ .It powerpc64le Ta Dv __powerpc__ , Dv __powerpc64__ .It riscv64 Ta Dv __riscv , Dv __riscv_xlen == 64 .El .Pp Compilers may define additional variants of architecture-specific macros. The macros above are preferred for use in .Fx . .Ss Important Xr make 1 variables Most of the externally settable variables are defined in the .Xr build 7 man page. These variables are not otherwise documented and are used extensively in the build system. .Bl -tag -width "MACHINE_CPUARCH" .It Dv MACHINE Represents the hardware platform. This is the same as the native platform's .Xr uname 1 .Fl m output. It defines both the userland / kernel interface, as well as the bootloader / kernel interface. It should only be used in these contexts. Each CPU architecture may have multiple hardware platforms it supports where .Dv MACHINE differs among them. It is used to collect together all the files from .Xr config 8 to build the kernel. It is often the same as .Dv MACHINE_ARCH just as one CPU architecture can be implemented by many different hardware platforms, one hardware platform may support multiple CPU architecture family members, though with different binaries. For example, .Dv MACHINE of i386 supported the IBM-AT hardware platform while the .Dv MACHINE of pc98 supported the Japanese company NEC's PC-9801 and PC-9821 hardware platforms. Both of these hardware platforms supported only the .Dv MACHINE_ARCH of i386 where they shared a common ABI, except for certain kernel / userland interfaces relating to underlying hardware platform differences in bus architecture, device enumeration and boot interface. Generally, .Dv MACHINE should only be used in src/sys and src/stand or in system imagers or installers. .It Dv MACHINE_ARCH Represents the CPU processor architecture. This is the same as the native platforms .Xr uname 1 .Fl p output. It defines the CPU instruction family supported. It may also encode a variation in the byte ordering of multi-byte integers (endian). It may also encode a variation in the size of the integer or pointer. It may also encode a ISA revision. It may also encode hard versus soft floating point ABI and usage. It may also encode a variant ABI when the other factors do not uniquely define the ABI. It, along with .Dv MACHINE , defines the ABI used by the system. Generally, the plain CPU name specifies the most common (or at least first) variant of the CPU. This is why powerpc and powerpc64 imply 'big endian' while 'armv6' and 'armv7' imply little endian. If we ever were to support the so-called x32 ABI (using 32-bit pointers on the amd64 architecture), it would most likely be encoded as amd64-x32. It is unfortunate that amd64 specifies the 64-bit evolution of the x86 platform (it matches the 'first rule') as everybody else uses x86_64. There is no standard name for the processor: each OS selects its own conventions. .It Dv MACHINE_CPUARCH Represents the source location for a given .Dv MACHINE_ARCH . It is generally the common prefix for all the MACHINE_ARCH that share the same implementation, though 'riscv' breaks this rule. While amd64 and i386 are closely related, MACHINE_CPUARCH is not x86 for them. The .Fx source base supports amd64 and i386 with two distinct source bases living in subdirectories named amd64 and i386 (though behind the scenes there's some sharing that fits into this framework). .It Dv CPUTYPE Sets the flavor of .Dv MACHINE_ARCH to build. It is used to optimize the build for a specific CPU / core that the binaries run on. Generally, this does not change the ABI, though it can be a fine line between optimization for specific cases. .It Dv TARGET Used to set .Dv MACHINE in the top level Makefile for cross building. Unused outside of that scope. It is not passed down to the rest of the build. Makefiles outside of the top level should not use it at all (though some have their own private copy for hysterical raisons). .It Dv TARGET_ARCH Used to set .Dv MACHINE_ARCH by the top level Makefile for cross building. Like .Dv TARGET , it is unused outside of that scope. .El .Sh SEE ALSO .Xr src.conf 5 , -.Xr build 7 , -.Xr simd 7 +.Xr build 7 .Sh HISTORY An .Nm manual page appeared in .Fx 11.1 . diff --git a/share/man/man7/environ.7 b/share/man/man7/environ.7 index 8a927c0f9f3d..7e3437e7c6d4 100644 --- a/share/man/man7/environ.7 +++ b/share/man/man7/environ.7 @@ -1,321 +1,313 @@ .\" Copyright (c) 1983, 1990, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)environ.7 8.3 (Berkeley) 4/19/94 .\" -.Dd September 3, 2023 +.Dd August 5, 2020 .Dt ENVIRON 7 .Os .Sh NAME .Nm environ .Nd user environment .Sh SYNOPSIS .Ar extern char **environ ; .Sh DESCRIPTION An array of strings, called the .Ar environment is made available to each process by .Xr execve 2 when a process begins. By convention these strings have the form .Va name Ns No = Ns Ar value , and are referred to as .Dq environment variables . A process can query, update, and delete these strings using the .Xr getenv 3 , .Xr setenv 3 , and .Xr unsetenv 3 functions, respectively. The shells also provide commands to manipulate the environment; they are described in the respective shell manual pages. .Pp What follows is a list of environment variables typically seen on a .Ux system. It includes only those variables that a user can expect to see during their day-to-day use of the system, and is far from complete. Environment variables specific to a particular program or library function are documented in the .Sx ENVIRONMENT section of the appropriate manual page. .Sh ENVIRONMENT .Bl -tag -width LD_LIBRARY_PATH -.It Ev ARCHLEVEL -On -.Em amd64 , -controls the level of SIMD enhancements used. -See -.Xr simd 7 -for details. .It Ev BLOCKSIZE The size of the block units used by several disk-related commands, most notably .Xr df 1 , .Xr du 1 and .Xr ls 1 . .Ev BLOCKSIZE may be specified in units of a byte by specifying a number, in units of a kilobyte by specifying a number followed by .Ql K or .Ql k , in units of a megabyte by specifying a number followed by .Ql M or .Ql m , and in units of a gigabyte by specifying a number followed by .Ql G or .Ql g . Sizes less than 512 bytes or greater than a gigabyte are ignored. This variable is processed by the .Xr getbsize 3 function. .It Ev COLUMNS The user's preferred width in column positions for the terminal. Utilities such as .Xr ls 1 and .Xr who 1 use this to format output into columns. If unset or empty, utilities will use an .Xr ioctl 2 call to ask the terminal driver for the width. .It Ev EDITOR Default editor name. .It Ev EXINIT A startup list of commands read by .Xr ex 1 and .Xr vi 1 . .It Ev HOME A user's login directory, set by .Xr login 1 from the password file .Xr passwd 5 . .It Ev LANG This variable configures all programs which use .Xr setlocale 3 to use the specified locale unless the .Ev LC_* variables are set. .It Ev LC_ALL Overrides the values of .Ev LC_COLLATE , .Ev LC_CTYPE , .Ev LC_MESSAGES , .Ev LC_MONETARY , .Ev LC_NUMERIC , .Ev LC_TIME and .Ev LANG . .It Ev LC_COLLATE Locale to be used for ordering of strings. .It Ev LC_CTYPE Locale to be used for character classification (letter, space, digit, etc.) and for interpreting byte sequences as multibyte characters. .It Ev LC_MESSAGES Locale to be used for diagnostic messages. .It Ev LC_MONETARY Locale to be used for interpreting monetary input and formatting output. .It Ev LC_NUMERIC Locale to be used for interpreting numeric input and formatting output. .It Ev LC_TIME Locale to be used for interpreting dates input and for formatting output. .It Ev MAIL The location of the user's mailbox instead of the default in /var/mail, used by .Xr mail 1 , .Xr sh 1 , and many other mail clients. .It Ev MANPATH The sequence of directories, separated by colons, searched by .Xr man 1 when looking for manual pages. .It Ev NLSPATH List of directories to be searched for the message catalog referred to by .Ev LC_MESSAGES . See .Xr catopen 3 . .It Ev PAGER Default paginator program. The program specified by this variable is used by .Xr mail 1 , .Xr man 1 , .Xr ftp 1 , etc, to display information which is longer than the current display. .It Ev PATH The sequence of directories, separated by colons, searched by .Xr csh 1 , .Xr sh 1 , .Xr system 3 , .Xr execvp 3 , etc, when looking for an executable file. .Ev PATH is set to ``/usr/bin:/bin'' initially by .Xr login 1 . .It Ev POSIXLY_CORRECT When set to any value, this environment variable modifies the behaviour of certain commands to (mostly) execute in a strictly POSIX-compliant manner. .It Ev PRINTER The name of the default printer to be used by .Xr lpr 1 , .Xr lpq 1 , and .Xr lprm 1 . .It Ev PWD The current directory pathname. .It Ev SHELL The full pathname of the user's login shell. .It Ev TERM The kind of terminal for which output is to be prepared. This information is used by commands, such as .Xr nroff 1 Pq Pa ports/textproc/groff or .Xr plot 1 which may exploit special terminal capabilities. See .Pa /usr/share/misc/termcap .Pq Xr termcap 5 for a list of terminal types. .It Ev TERMCAP The string describing the terminal in .Ev TERM , or, if it begins with a '/', the name of the termcap file. See .Ev TERMPATH below, and .Xr termcap 5 . .It Ev TERMPATH A sequence of pathnames of termcap files, separated by colons or spaces, which are searched for terminal descriptions in the order listed. Having no .Ev TERMPATH is equivalent to a .Ev TERMPATH of .Pa $HOME/.termcap:/etc/termcap . .Ev TERMPATH is ignored if .Ev TERMCAP contains a full pathname. .It Ev TMPDIR The directory in which to store temporary files. Most applications use either .Pa /tmp or .Pa /var/tmp . Setting this variable will make them use another directory. .It Ev TZ The timezone to use when displaying dates. The normal format is a pathname relative to .Pa /usr/share/zoneinfo . For example, the command .Pp .Dl env TZ=America/Los_Angeles date .Pp displays the current time in California. See .Xr tzset 3 for more information. .It Ev USER The login name of the user. It is recommended that portable applications use .Ev LOGNAME instead. .El .Pp Further names may be placed in the environment by the .Ic export command and .Ar name=value arguments in .Xr sh 1 , or by the .Ic setenv command if you use .Xr csh 1 . It is unwise to change certain .Xr sh 1 variables that are frequently exported by .Pa .profile files, such as .Ev MAIL , .Ev PS1 , .Ev PS2 , and .Ev IFS , unless you know what you are doing. .Pp The current environment variables can be printed with .Xr env 1 , .Xr set 1 or .Xr printenv 1 in .Xr sh 1 and .Xr env 1 , .Xr printenv 1 or the .Cm printenv built-in command in .Xr csh 1 . .Sh SEE ALSO .Xr cd 1 , .Xr csh 1 , .Xr env 1 , .Xr ex 1 , .Xr login 1 , .Xr printenv 1 , .Xr sh 1 , .Xr execve 2 , .Xr execle 3 , .Xr getbsize 3 , .Xr getenv 3 , .Xr setenv 3 , .Xr setlocale 3 , .Xr system 3 , .Xr termcap 3 , -.Xr termcap 5 , -.Xr simd 7 +.Xr termcap 5 .Sh HISTORY The .Nm manual page appeared in .At v7 . diff --git a/share/man/man7/simd.7 b/share/man/man7/simd.7 deleted file mode 100644 index bf33d4eb5531..000000000000 --- a/share/man/man7/simd.7 +++ /dev/null @@ -1,227 +0,0 @@ -.\" Copyright (c) 2023 The FreeBSD Foundation -. -.\" This documentation was written by Robert Clausecker -.\" under sponsorship from the FreeBSD Foundation. -. -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -. -.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE -. -.Dd August 13, 2023 -.Dt SIMD 7 -.Os -.Sh NAME -.Nm simd -.Nd SIMD enhancements -. -.Sh DESCRIPTION -On some architectures, the -.Fx -.Em libc -provides enhanced implementations of commonly used functions, replacing -the architecture-independent implementations used otherwise. -Depending on architecture and function, an enhanced -implementation of a function may either always be used or the -.Em libc -detects at runtime which SIMD instruction set extensions are -supported and picks the most suitable implementation automatically. -On -.Cm amd64 , -the environment variable -.Ev ARCHLEVEL -can be used to override this mechanism. -.Pp -Enhanced functions are present in the following architectures: -.Bl -column FUNCTION__ aarch64_ arm_ amd64_ i386_ ppc64_ -offset indent -.It Em FUNCTION Ta Em AARCH64 Ta Em ARM Ta Em AMD64 Ta Em I386 Ta Em PPC64 -.It bcmp Ta Ta Ta S1 Ta S -.It bcopy Ta Ta S Ta S Ta S Ta SV -.It bzero Ta Ta S Ta S Ta S -.It div Ta Ta Ta S Ta S -.It index Ta S Ta Ta S1 -.It ldiv Ta Ta Ta S Ta S -.It lldiv Ta Ta Ta S -.It memcmp Ta Ta S Ta S1 Ta S -.It memcpy Ta S Ta S Ta S Ta S Ta SV -.It memmove Ta S Ta S Ta S Ta S Ta SV -.It memset Ta Ta S Ta S Ta S -.It rindex Ta S -.It stpcpy Ta Ta Ta S1 -.It strcat Ta Ta Ta S Ta S -.It strchr Ta S Ta Ta S1 Ta S -.It strchrnul Ta Ta Ta S1 -.It strcmp Ta Ta S Ta S Ta S -.It strcpy Ta Ta Ta S1 Ta S Ta S2 -.It strlen Ta Ta S Ta S1 -.It strncmp Ta Ta S Ta Ta S -.It strncpy Ta Ta Ta Ta Ta S2 -.It strrchr Ta S Ta Ta Ta S -.It swab Ta Ta Ta Ta S -.It wcschr Ta Ta Ta Ta S -.It wcscmp Ta Ta Ta Ta S -.It wcslen Ta Ta Ta Ta S -.It wmemchr Ta Ta Ta Ta S -.El -.Pp -.Sy S Ns :\ scalar (non-SIMD), -.Sy 1 Ns :\ amd64 baseline, -.Sy 2 Ns :\ x86-64-v2 -or PowerPC\ 2.05, -.Sy 3 Ns :\ x86-64-v3, -.Sy 4 Ns :\ x86-64-v4, -.Sy V Ns :\ PowerPC\ VSX. -. -.Sh ENVIRONMENT -.Bl -tag -.It Ev ARCHLEVEL -On -.Em amd64 , -controls the level of SIMD enhancements used. -If this variable is set to an architecture level from the list below -and that architecture level is supported by the processor, SIMD -enhancements up to -.Ev ARCHLEVEL -are used. -If -.Ev ARCHLEVEL -is unset, not recognised, or not supported by the processor, the highest -level of SIMD enhancements supported by the processor is used. -.Pp -A suffix beginning with -.Sq ":" -or -.Sq "+" -in -.Ev ARCHLEVEL -is ignored and may be used for future extensions. -The architecture level can be prefixed with a -.Sq "!" -character to force use of the requested architecture level, even if the -processor does not advertise that it is supported. -This usually causes applications to crash and should only be used for -testing purposes or if architecture level detection yields incorrect -results. -.Pp -The architecture levels follow the AMD64 SysV ABI supplement: -.Bl -tag -width x86-64-v2 -.It Cm scalar -scalar enhancements only (no SIMD) -.It Cm baseline -cmov, cx8, x87 FPU, fxsr, MMX, osfxsr, SSE, SSE2 -.It Cm x86-64-v2 -cx16, lahf/sahf, popcnt, SSE3, SSSE3, SSE4.1, SSE4.2 -.It Cm x86-64-v3 -AVX, AVX2, BMI1, BMI2, F16C, FMA, lzcnt, movbe, osxsave -.It Cm x86-64-v4 -AVX-512F/BW/CD/DQ/VL -.El -.El -. -.Sh DIAGNOSTICS -.Bl -diag -.It "Illegal Instruction" -Printed by -.Xr sh 1 -if a command is terminated through delivery of a -.Dv SIGILL -signal, see -.Xr signal 3 . -.Pp -Use of an unsupported architecture level was forced by setting -.Ev ARCHLEVEL -to a string beginning with a -.Sq "!" -character, causing a process to crash due to use of an unsupported -instruction. -Unset -.Ev ARCHLEVEL , -remove the -.Sq "!" -prefix or select a supported architecture level. -.Pp -Message may also appear for unrelated reasons. -.El -. -.Sh SEE ALSO -.Xr string 3 , -.Xr arch 7 -.Rs -.%A H. J. Lu -.%A Michael Matz -.%A Milind Girkar -.%A Jan Hubi\[u010D]ka \" \(vc -.%A Andreas Jaeger -.%A Mark Mitchell -.%B System V Application Binary Interface -.%D May 23, 2023 -.%T AMD64 Architecture Processor Supplement -.%O Version 1.0 -.Re -. -.Sh HISTORY -Architecture-specific enhanced -.Em libc -functions were added starting -with -.Fx 2.0 -for -.Cm i386 , -.Fx 6.0 -for -.Cm arm , -.Fx 6.1 -for -.Cm amd64 , -.Fx 11.0 -for -.Cm aarch64 , -and -.Fx 12.0 -for -.Cm powerpc64 . -SIMD-enhanced functions were first added with -.Fx 13.0 -for -.Cm powerpc64 -and with -.Fx 14.0 -for -.Cm amd64 . -.Pp -A -.Nm -manual page appeared in -.Fx 14.0 . -. -.Sh AUTHOR -.An Robert Clausecker Aq Mt fuz@FreeBSD.org -. -.Sh CAVEATS -Other parts of -.Fx -such as cryptographic routines in the kernel or in -OpenSSL may also use SIMD enhancements. -These enhancements are not subject to the -.Ev ARCHLEVEL -variable and may have their own configuration -mechanism. -. -.Sh BUGS -Use of SIMD enhancements cannot be configured on powerpc64.