Changeset View
Standalone View
lib/libc/string/bcopy.c
Show All 34 Lines | |||||
#if defined(LIBC_SCCS) && !defined(lint) | #if defined(LIBC_SCCS) && !defined(lint) | ||||
static char sccsid[] = "@(#)bcopy.c 8.1 (Berkeley) 6/4/93"; | static char sccsid[] = "@(#)bcopy.c 8.1 (Berkeley) 6/4/93"; | ||||
#endif /* LIBC_SCCS and not lint */ | #endif /* LIBC_SCCS and not lint */ | ||||
#include <sys/cdefs.h> | #include <sys/cdefs.h> | ||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include <sys/types.h> | #include <sys/types.h> | ||||
typedef intptr_t word; /* "word" used for optimal copy speed */ | typedef size_t word; /* "word" used for optimal copy speed */ | ||||
jrtc27: Why? Shouldn't make a difference for FreeBSD as, other than signedness, they should be the same. | |||||
#define wsize sizeof(word) | #define wsize sizeof(word) | ||||
#define wmask (wsize - 1) | #define wmask (wsize - 1) | ||||
/* | /* | ||||
* Copy a block of memory, handling overlap. | * Copy a block of memory, handling overlap. | ||||
* This is the routine that actually implements | * This is the routine that actually implements | ||||
* (the portable versions of) bcopy, memcpy, and memmove. | * (the portable versions of) bcopy, memcpy, and memmove. | ||||
*/ | */ | ||||
#if defined(MEMCOPY) || defined(MEMMOVE) | #if defined(MEMCOPY) || defined(MEMMOVE) | ||||
#include <string.h> | #include <string.h> | ||||
void * | void * | ||||
#ifdef MEMCOPY | #ifdef MEMCOPY | ||||
memcpy | memcpy | ||||
(void * __restrict dst0, const void * __restrict src0, size_t length) | |||||
#else | #else | ||||
memmove | memmove | ||||
#endif | |||||
(void *dst0, const void *src0, size_t length) | (void *dst0, const void *src0, size_t length) | ||||
#endif | |||||
#else | #else | ||||
#include <strings.h> | #include <strings.h> | ||||
void | void | ||||
bcopy(const void *src0, void *dst0, size_t length) | bcopy(const void *src0, void *dst0, size_t length) | ||||
#endif | #endif | ||||
{ | { | ||||
char *dst = dst0; | if (__builtin_expect(length == 0, 0)) | ||||
Done Inline Actions__predict_false? jrtc27: __predict_false? | |||||
Done Inline Actionsyes gfunni234_gmail.com: yes | |||||
Not Done Inline ActionsI mean "use the __predict_false macro rather than writing your own". jrtc27: I mean "use the __predict_false macro rather than writing your own". | |||||
const char *src = src0; | |||||
size_t t; | |||||
if (length == 0 || dst == src) /* nothing to do */ | |||||
goto done; | goto done; | ||||
/* | /* | ||||
* Macros: loop-t-times; and loop-t-times, t>0 | * Macros: loop-t-times; and loop-t-times, t>0 | ||||
*/ | */ | ||||
#define TLOOP(s) if (t) TLOOP1(s) | #define TLOOP(s) for (; t; --t) { s; } | ||||
Done Inline ActionsThis should be entirely equivalent to what was there before, and any modern compiler should be able to optimise that. jrtc27: This should be entirely equivalent to what was there before, and any modern compiler should be… | |||||
Done Inline Actionsyeah it just looks cleaner gfunni234_gmail.com: yeah it just looks cleaner | |||||
Not Done Inline ActionsThat's not a good reason to change something. Both are equivalent semantically, both get optimised to the same thing and both are easy to understand. But whether or not it's cleaner is subjective; personally I prefer what's there because it's shorter and reuses code. jrtc27: That's not a good reason to change something. Both are equivalent semantically, both get… | |||||
Done Inline ActionsAlso reason: tloop1 is not defined when tloop is, so there is no guarantee of it being properly substituted, even though every compiler thus far has been doing so gfunni234_gmail.com: Also reason: tloop1 is not defined when tloop is, so there is no guarantee of it being properly… | |||||
#define TLOOP1(s) do { s; } while (--t) | #define TLOOP1(s) do { s; } while (--t) | ||||
if ((unsigned long)dst < (unsigned long)src) { | u_char *dst = (u_char *)dst0; | ||||
Not Done Inline ActionsMoving these violates style(9) jrtc27: Moving these violates style(9) | |||||
Not Done Inline ActionsWhy use u_char all of a sudden? char works just fine. jrtc27: Why use u_char all of a sudden? char works just fine. | |||||
Done Inline ActionsNeither of these have been addressed. jrtc27: Neither of these have been addressed. | |||||
Done Inline Actionsunsigned char is always a byte per the c standard. Strangely, it is not guaranteed for char, despite it being that way on pretty much every arch. It feels wrong to operate on the char when the function is about raw memory, not string chars. gfunni234_gmail.com: unsigned char is always a byte per the c standard. Strangely, it is not guaranteed for char… | |||||
const u_char *src = (const u_char *)src0; | |||||
size_t t; | |||||
Done Inline ActionsUsing size_t here is actually good for CHERI, we changed it downstream as otherwise you have uintptr_t | uintptr_t later, the provenance for which is ambiguous, but we don't actually need a pointer, just the low bits of it. jrtc27: Using size_t here is actually good for CHERI, we changed it downstream as otherwise you have… | |||||
#ifndef MEMCOPY | |||||
Done Inline ActionsI would prefer to split the cosmetic changes to this function from the functional ones. E.g, have one commit that adds the __restrict and *only* adds this #ifndef, then later the all the switching around of dst and src, and all the other cosmetics. dim: I would prefer to split the cosmetic changes to this function from the functional ones. E.g… | |||||
if (dst < src) { | |||||
#endif | |||||
/* | /* | ||||
* Copy forward. | * Copy forward. | ||||
*/ | */ | ||||
t = (uintptr_t)src; /* only need low bits */ | t = (size_t)src; /* only need low bits */ | ||||
if ((t | (uintptr_t)dst) & wmask) { | if ((t | (size_t)dst) & wmask) { | ||||
Not Done Inline ActionsWhy the blank line? jrtc27: Why the blank line? | |||||
/* | /* | ||||
* Try to align operands. This cannot be done | * Try to align operands. This cannot be done | ||||
* unless the low bits match. | * unless the low bits match. | ||||
*/ | */ | ||||
if ((t ^ (uintptr_t)dst) & wmask || length < wsize) | if ((t ^ (size_t)dst) & wmask || length < wsize) { | ||||
t = length; | t = length; | ||||
else | TLOOP1(*dst++ = *src++); | ||||
goto done; | |||||
} | |||||
t = wsize - (t & wmask); | t = wsize - (t & wmask); | ||||
length -= t; | length -= t; | ||||
TLOOP1(*dst++ = *src++); | TLOOP1(*dst++ = *src++); | ||||
} | } | ||||
/* | /* | ||||
* Copy whole words, then mop up any trailing bytes. | * Copy whole words, then mop up any trailing bytes. | ||||
*/ | */ | ||||
t = length / wsize; | t = length / wsize; | ||||
TLOOP(*(word *)(void *)dst = *(const word *)(const void *)src; | TLOOP(*(word *)dst = *(const word *)src; dst += wsize; | ||||
src += wsize; dst += wsize); | src += wsize); | ||||
Done Inline ActionsWhy move dst and src around? jrtc27: Why move dst and src around? | |||||
Done Inline ActionsIt reads better since src comes after dst gfunni234_gmail.com: It reads better since src comes after dst | |||||
Not Done Inline ActionsSubjective. Also it was better when they were both on the same line, rather than one hiding at the end of the previous long line. Don't change code because you can. jrtc27: Subjective. Also it was better when they were both on the same line, rather than one hiding at… | |||||
Done Inline ActionsDon't think of it in terms of changes, but think of the code how it IS. Especially when the other code has dst FIRST and then src. Consistency is better even in small cases like this gfunni234_gmail.com: Don't think of it in terms of changes, but think of the code how it IS.
Especially when the… | |||||
t = length & wmask; | t = length & wmask; | ||||
TLOOP(*dst++ = *src++); | TLOOP(*dst++ = *src++); | ||||
#ifndef MEMCOPY | |||||
} else { | } else { | ||||
/* | /* | ||||
* Copy backwards. Otherwise essentially the same. | * Copy backwards. Otherwise essentially the same. | ||||
* Alignment works as before, except that it takes | * Alignment works as before, except that it takes | ||||
* (t&wmask) bytes to align, not wsize-(t&wmask). | * (t&wmask) bytes to align, not wsize-(t&wmask). | ||||
*/ | */ | ||||
src += length; | |||||
dst += length; | dst += length; | ||||
t = (uintptr_t)src; | src += length; | ||||
if ((t | (uintptr_t)dst) & wmask) { | t = (size_t)src; | ||||
Not Done Inline ActionsWhy move? jrtc27: Why move? | |||||
if ((t ^ (uintptr_t)dst) & wmask || length <= wsize) | |||||
if ((t | (size_t)dst) & wmask) { | |||||
Not Done Inline ActionsWhy the blank line? jrtc27: Why the blank line? | |||||
if ((t ^ (size_t)dst) & wmask || length <= wsize) { | |||||
t = length; | t = length; | ||||
Done Inline Actions< has become <=, I assume because you copied it from the forward case, and I think this now behaves incorrectly for an unaligned-but-by-the-same-amount (e.g. 0x1001 to 0x2001) word-sized memcpy. It works for the forward case because t = wsize - (t & wmask) gives you wsize when t == length, but in the backwards case t &= wmask gives you 0 not length (which, due to the use of TLOOP1, ends up doing SIZE_MAX+1 iterations rather than 0). jrtc27: < has become <=, I assume because you copied it from the forward case, and I think this now… | |||||
else | TLOOP1(*--dst = *--src); | ||||
goto done; | |||||
} | |||||
t &= wmask; | t &= wmask; | ||||
length -= t; | length -= t; | ||||
TLOOP1(*--dst = *--src); | TLOOP1(*--dst = *--src); | ||||
} | } | ||||
t = length / wsize; | t = length / wsize; | ||||
TLOOP(src -= wsize; dst -= wsize; | TLOOP(dst -= wsize; src -= wsize; | ||||
*(word *)(void *)dst = *(const word *)(const void *)src); | *(word *)dst = *(const word *)src); | ||||
Not Done Inline ActionsWhy swap the order? jrtc27: Why swap the order? | |||||
t = length & wmask; | t = length & wmask; | ||||
TLOOP(*--dst = *--src); | TLOOP(*--dst = *--src); | ||||
} | } | ||||
#endif | |||||
done: | done: | ||||
#if defined(MEMCOPY) || defined(MEMMOVE) | #if defined(MEMCOPY) || defined(MEMMOVE) | ||||
return (dst0); | return (dst0); | ||||
Done Inline ActionsThere's nothing else in the file, seems a bit unnecessary jrtc27: There's nothing else in the file, seems a bit unnecessary | |||||
#else | #else | ||||
return; | return; | ||||
#endif | #endif | ||||
} | } |
Why? Shouldn't make a difference for FreeBSD as, other than signedness, they should be the same. For CheriBSD we need to use a different type, intcap_t (which is what intptr_t is for pure-capability code so the existing code _almost_ works, except for hybrid code intptr_t remains a 32/64-bit integer, not a capability like intcap_t).