diff --git a/lib/libc/string/bcopy.c b/lib/libc/string/bcopy.c --- a/lib/libc/string/bcopy.c +++ b/lib/libc/string/bcopy.c @@ -40,7 +40,7 @@ #include -typedef intptr_t word; /* "word" used for optimal copy speed */ +typedef size_t word; /* "word" used for optimal copy speed */ #define wsize sizeof(word) #define wmask (wsize - 1) @@ -56,10 +56,11 @@ void * #ifdef MEMCOPY memcpy +(void * __restrict dst0, const void * __restrict src0, size_t length) #else memmove -#endif (void *dst0, const void *src0, size_t length) +#endif #else #include @@ -67,33 +68,37 @@ bcopy(const void *src0, void *dst0, size_t length) #endif { - char *dst = dst0; - const char *src = src0; - size_t t; - - if (length == 0 || dst == src) /* nothing to do */ + if (__predict_false(length == 0)) goto done; + u_char *dst = (u_char *)dst0; + const u_char *src = (const u_char *)src0; + size_t t; /* * Macros: loop-t-times; and loop-t-times, t>0 */ -#define TLOOP(s) if (t) TLOOP1(s) +#define TLOOP(s) for (; t; --t) { s; } #define TLOOP1(s) do { s; } while (--t) - if ((unsigned long)dst < (unsigned long)src) { +#ifndef MEMCOPY + if (dst < src) { +#endif /* * Copy forward. */ - t = (uintptr_t)src; /* only need low bits */ - if ((t | (uintptr_t)dst) & wmask) { + t = (size_t)src; /* only need low bits */ + if ((t | (size_t)dst) & wmask) { /* * Try to align operands. This cannot be done * unless the low bits match. */ - if ((t ^ (uintptr_t)dst) & wmask || length < wsize) + if ((t ^ (size_t)dst) & wmask || length < wsize) { t = length; - else - t = wsize - (t & wmask); + TLOOP1(*dst++ = *src++); + goto done; + } + + t = wsize - (t & wmask); length -= t; TLOOP1(*dst++ = *src++); } @@ -101,33 +106,39 @@ * Copy whole words, then mop up any trailing bytes. */ t = length / wsize; - TLOOP(*(word *)(void *)dst = *(const word *)(const void *)src; - src += wsize; dst += wsize); + TLOOP(*(word *)dst = *(const word *)src; dst += wsize; + src += wsize); t = length & wmask; TLOOP(*dst++ = *src++); +#ifndef MEMCOPY } else { /* * Copy backwards. Otherwise essentially the same. * Alignment works as before, except that it takes * (t&wmask) bytes to align, not wsize-(t&wmask). */ - src += length; dst += length; - t = (uintptr_t)src; - if ((t | (uintptr_t)dst) & wmask) { - if ((t ^ (uintptr_t)dst) & wmask || length <= wsize) + src += length; + t = (size_t)src; + + if ((t | (size_t)dst) & wmask) { + if ((t ^ (size_t)dst) & wmask || length <= wsize) { t = length; - else - t &= wmask; + TLOOP1(*--dst = *--src); + goto done; + } + + t &= wmask; length -= t; TLOOP1(*--dst = *--src); } t = length / wsize; - TLOOP(src -= wsize; dst -= wsize; - *(word *)(void *)dst = *(const word *)(const void *)src); + TLOOP(dst -= wsize; src -= wsize; + *(word *)dst = *(const word *)src); t = length & wmask; TLOOP(*--dst = *--src); } +#endif done: #if defined(MEMCOPY) || defined(MEMMOVE) return (dst0); diff --git a/sys/libkern/bcopy.c b/sys/libkern/bcopy.c --- a/sys/libkern/bcopy.c +++ b/sys/libkern/bcopy.c @@ -57,7 +57,7 @@ * sizeof(word) MUST BE A POWER OF TWO * SO THAT wmask BELOW IS ALL ONES */ -typedef long word; /* "word" used for optimal copy speed */ +typedef size_t word; /* "word" used for optimal copy speed */ #define wsize sizeof(word) #define wmask (wsize - 1) @@ -65,21 +65,17 @@ /* * Copy a block of memory, handling overlap. * This is the routine that actually implements - * (the portable versions of) bcopy, memcpy, and memmove. + * (the portable version of) memcpy. */ void * -memcpy(void *dst0, const void *src0, size_t length) +memcpy(void * __restrict dst0, const void * __restrict src0, size_t length) { - char *dst; - const char *src; - size_t t; + if (__predict_false(length == 0)) + return (dst0); - dst = dst0; - src = src0; - - if (length == 0 || dst == src) { /* nothing to do */ - goto done; - } + u_char *dst = (u_char *)dst0; + const u_char *src = (const u_char *)src0; + size_t t; /* * Macros: loop-t-times; and loop-t-times, t>0 @@ -87,23 +83,68 @@ #define TLOOP(s) if (t) TLOOP1(s) #define TLOOP1(s) do { s; } while (--t) - if ((unsigned long)dst < (unsigned long)src) { + t = (size_t)src; /* only need low bits */ + + if ((t | (size_t)dst) & wmask) { + /* + * Try to align operands. This cannot be done + * unless the low bits match. + */ + if ((t ^ (size_t)dst) & wmask || length < wsize) { + t = length; + TLOOP1(*dst++ = *src++); + return (dst0); + } + + t = wsize - (t & wmask); + length -= t; + TLOOP1(*dst++ = *src++); + } + /* + * Copy whole words, then mop up any trailing bytes. + */ + t = length / wsize; + TLOOP(*(word *)dst = *(const word *)src; dst += wsize; + src += wsize); + t = length & wmask; + TLOOP(*dst++ = *src++); + + return (dst0); +} + +/* + * Copy a block of memory, handling overlap. + * This is the routine that actually implements + * (the portable version of) memmove. + */ +void * +memmove(void *dst0, const void *src0, size_t length) +{ + if (__predict_false(length == 0)) + return (dst0); + + u_char *dst = (u_char *)dst0; + const u_char *src = (const u_char *)src0; + size_t t; + + if (dst < src) { /* * Copy forward. */ t = (size_t)src; /* only need low bits */ - if ((t | (uintptr_t)dst) & wmask) { + if ((t | (size_t)dst) & wmask) { /* * Try to align operands. This cannot be done * unless the low bits match. */ - if ((t ^ (uintptr_t)dst) & wmask || length < wsize) { + if ((t ^ (size_t)dst) & wmask || length < wsize) { t = length; - } else { - t = wsize - (t & wmask); + TLOOP1(*dst++ = *src++); + return (dst0); } + t = wsize - (t & wmask); length -= t; TLOOP1(*dst++ = *src++); } @@ -111,8 +152,8 @@ * Copy whole words, then mop up any trailing bytes. */ t = length / wsize; - TLOOP(*(word *)dst = *(const word *)src; src += wsize; - dst += wsize); + TLOOP(*(word *)dst = *(const word *)src; dst += wsize; + src += wsize); t = length & wmask; TLOOP(*dst++ = *src++); } else { @@ -121,28 +162,29 @@ * Alignment works as before, except that it takes * (t&wmask) bytes to align, not wsize-(t&wmask). */ - src += length; dst += length; - t = (uintptr_t)src; + src += length; + t = (size_t)src; - if ((t | (uintptr_t)dst) & wmask) { - if ((t ^ (uintptr_t)dst) & wmask || length <= wsize) { + if ((t | (size_t)dst) & wmask) { + if ((t ^ (size_t)dst) & wmask || length < wsize) { t = length; - } else { - t &= wmask; + TLOOP1(*--dst = *--src); + return (dst0); } - + + t &= wmask; length -= t; TLOOP1(*--dst = *--src); } t = length / wsize; - TLOOP(src -= wsize; dst -= wsize; + TLOOP(dst -= wsize; src -= wsize; *(word *)dst = *(const word *)src); t = length & wmask; TLOOP(*--dst = *--src); } -done: +#undef TLOOP +#undef TLOOP1 + return (dst0); } - -__strong_reference(memcpy, memmove);