Changeset View
Standalone View
lib/libc/string/bcopy.c
Context not available. | |||||
#include <sys/types.h> | #include <sys/types.h> | ||||
typedef intptr_t word; /* "word" used for optimal copy speed */ | typedef size_t word; /* "word" used for optimal copy speed */ | ||||
jrtc27: Why? Shouldn't make a difference for FreeBSD as, other than signedness, they should be the same. | |||||
#define wsize sizeof(word) | #define wsize sizeof(word) | ||||
#define wmask (wsize - 1) | #define wmask (wsize - 1) | ||||
Context not available. | |||||
void * | void * | ||||
#ifdef MEMCOPY | #ifdef MEMCOPY | ||||
memcpy | memcpy | ||||
(void * __restrict dst0, const void * __restrict src0, size_t length) | |||||
#else | #else | ||||
memmove | memmove | ||||
#endif | |||||
(void *dst0, const void *src0, size_t length) | (void *dst0, const void *src0, size_t length) | ||||
#endif | |||||
#else | #else | ||||
#include <strings.h> | #include <strings.h> | ||||
Context not available. | |||||
bcopy(const void *src0, void *dst0, size_t length) | bcopy(const void *src0, void *dst0, size_t length) | ||||
#endif | #endif | ||||
{ | { | ||||
char *dst = dst0; | if (__builtin_expect(length == 0, 0)) | ||||
jrtc27Unsubmitted Done Inline Actions__predict_false? jrtc27: __predict_false? | |||||
gfunni234_gmail.comAuthorUnsubmitted Done Inline Actionsyes gfunni234_gmail.com: yes | |||||
jrtc27Unsubmitted Not Done Inline ActionsI mean "use the __predict_false macro rather than writing your own". jrtc27: I mean "use the __predict_false macro rather than writing your own". | |||||
const char *src = src0; | |||||
size_t t; | |||||
if (length == 0 || dst == src) /* nothing to do */ | |||||
goto done; | goto done; | ||||
/* | /* | ||||
* Macros: loop-t-times; and loop-t-times, t>0 | * Macros: loop-t-times; and loop-t-times, t>0 | ||||
*/ | */ | ||||
#define TLOOP(s) if (t) TLOOP1(s) | #define TLOOP(s) for (; t; --t) { s; } | ||||
jrtc27Unsubmitted Done Inline ActionsThis should be entirely equivalent to what was there before, and any modern compiler should be able to optimise that. jrtc27: This should be entirely equivalent to what was there before, and any modern compiler should be… | |||||
gfunni234_gmail.comAuthorUnsubmitted Done Inline Actionsyeah it just looks cleaner gfunni234_gmail.com: yeah it just looks cleaner | |||||
jrtc27Unsubmitted Not Done Inline ActionsThat's not a good reason to change something. Both are equivalent semantically, both get optimised to the same thing and both are easy to understand. But whether or not it's cleaner is subjective; personally I prefer what's there because it's shorter and reuses code. jrtc27: That's not a good reason to change something. Both are equivalent semantically, both get… | |||||
gfunni234_gmail.comAuthorUnsubmitted Done Inline ActionsAlso reason: tloop1 is not defined when tloop is, so there is no guarantee of it being properly substituted, even though every compiler thus far has been doing so gfunni234_gmail.com: Also reason: tloop1 is not defined when tloop is, so there is no guarantee of it being properly… | |||||
#define TLOOP1(s) do { s; } while (--t) | #define TLOOP1(s) do { s; } while (--t) | ||||
if ((unsigned long)dst < (unsigned long)src) { | u_char *dst = (u_char *)dst0; | ||||
jrtc27Unsubmitted Not Done Inline ActionsMoving these violates style(9) jrtc27: Moving these violates style(9) | |||||
jrtc27Unsubmitted Not Done Inline ActionsWhy use u_char all of a sudden? char works just fine. jrtc27: Why use u_char all of a sudden? char works just fine. | |||||
jrtc27Unsubmitted Done Inline ActionsNeither of these have been addressed. jrtc27: Neither of these have been addressed. | |||||
gfunni234_gmail.comAuthorUnsubmitted Done Inline Actionsunsigned char is always a byte per the c standard. Strangely, it is not guaranteed for char, despite it being that way on pretty much every arch. It feels wrong to operate on the char when the function is about raw memory, not string chars. gfunni234_gmail.com: unsigned char is always a byte per the c standard. Strangely, it is not guaranteed for char… | |||||
const u_char *src = (const u_char *)src0; | |||||
size_t t; | |||||
jrtc27Unsubmitted Done Inline ActionsUsing size_t here is actually good for CHERI, we changed it downstream as otherwise you have uintptr_t | uintptr_t later, the provenance for which is ambiguous, but we don't actually need a pointer, just the low bits of it. jrtc27: Using size_t here is actually good for CHERI, we changed it downstream as otherwise you have… | |||||
#ifndef MEMCOPY | |||||
dimUnsubmitted Done Inline ActionsI would prefer to split the cosmetic changes to this function from the functional ones. E.g, have one commit that adds the __restrict and *only* adds this #ifndef, then later the all the switching around of dst and src, and all the other cosmetics. dim: I would prefer to split the cosmetic changes to this function from the functional ones. E.g… | |||||
if (dst < src) { | |||||
#endif | |||||
/* | /* | ||||
* Copy forward. | * Copy forward. | ||||
*/ | */ | ||||
t = (uintptr_t)src; /* only need low bits */ | t = (size_t)src; /* only need low bits */ | ||||
if ((t | (uintptr_t)dst) & wmask) { | |||||
jrtc27Unsubmitted Not Done Inline ActionsWhy the blank line? jrtc27: Why the blank line? | |||||
if ((t | (size_t)dst) & wmask) { | |||||
/* | /* | ||||
* Try to align operands. This cannot be done | * Try to align operands. This cannot be done | ||||
* unless the low bits match. | * unless the low bits match. | ||||
*/ | */ | ||||
if ((t ^ (uintptr_t)dst) & wmask || length < wsize) | if ((t ^ (size_t)dst) & wmask || length < wsize) { | ||||
t = length; | t = length; | ||||
else | TLOOP1(*dst++ = *src++); | ||||
t = wsize - (t & wmask); | goto done; | ||||
} | |||||
t = wsize - (t & wmask); | |||||
length -= t; | length -= t; | ||||
TLOOP1(*dst++ = *src++); | TLOOP1(*dst++ = *src++); | ||||
} | } | ||||
Context not available. | |||||
* Copy whole words, then mop up any trailing bytes. | * Copy whole words, then mop up any trailing bytes. | ||||
*/ | */ | ||||
t = length / wsize; | t = length / wsize; | ||||
TLOOP(*(word *)(void *)dst = *(const word *)(const void *)src; | TLOOP(*(word *)dst = *(const word *)src; dst += wsize; | ||||
jrtc27Unsubmitted Done Inline ActionsWhy move dst and src around? jrtc27: Why move dst and src around? | |||||
gfunni234_gmail.comAuthorUnsubmitted Done Inline ActionsIt reads better since src comes after dst gfunni234_gmail.com: It reads better since src comes after dst | |||||
jrtc27Unsubmitted Not Done Inline ActionsSubjective. Also it was better when they were both on the same line, rather than one hiding at the end of the previous long line. Don't change code because you can. jrtc27: Subjective. Also it was better when they were both on the same line, rather than one hiding at… | |||||
gfunni234_gmail.comAuthorUnsubmitted Done Inline ActionsDon't think of it in terms of changes, but think of the code how it IS. Especially when the other code has dst FIRST and then src. Consistency is better even in small cases like this gfunni234_gmail.com: Don't think of it in terms of changes, but think of the code how it IS.
Especially when the… | |||||
src += wsize; dst += wsize); | src += wsize); | ||||
t = length & wmask; | t = length & wmask; | ||||
TLOOP(*dst++ = *src++); | TLOOP(*dst++ = *src++); | ||||
#ifndef MEMCOPY | |||||
} else { | } else { | ||||
/* | /* | ||||
* Copy backwards. Otherwise essentially the same. | * Copy backwards. Otherwise essentially the same. | ||||
* Alignment works as before, except that it takes | * Alignment works as before, except that it takes | ||||
* (t&wmask) bytes to align, not wsize-(t&wmask). | * (t&wmask) bytes to align, not wsize-(t&wmask). | ||||
*/ | */ | ||||
src += length; | |||||
dst += length; | dst += length; | ||||
t = (uintptr_t)src; | src += length; | ||||
jrtc27Unsubmitted Not Done Inline ActionsWhy move? jrtc27: Why move? | |||||
if ((t | (uintptr_t)dst) & wmask) { | t = (size_t)src; | ||||
if ((t ^ (uintptr_t)dst) & wmask || length <= wsize) | |||||
jrtc27Unsubmitted Not Done Inline ActionsWhy the blank line? jrtc27: Why the blank line? | |||||
if ((t | (size_t)dst) & wmask) { | |||||
if ((t ^ (size_t)dst) & wmask || length < wsize) { | |||||
jrtc27Unsubmitted Done Inline Actions< has become <=, I assume because you copied it from the forward case, and I think this now behaves incorrectly for an unaligned-but-by-the-same-amount (e.g. 0x1001 to 0x2001) word-sized memcpy. It works for the forward case because t = wsize - (t & wmask) gives you wsize when t == length, but in the backwards case t &= wmask gives you 0 not length (which, due to the use of TLOOP1, ends up doing SIZE_MAX+1 iterations rather than 0). jrtc27: < has become <=, I assume because you copied it from the forward case, and I think this now… | |||||
t = length; | t = length; | ||||
else | TLOOP1(*--dst = *--src); | ||||
t &= wmask; | goto done; | ||||
} | |||||
t &= wmask; | |||||
length -= t; | length -= t; | ||||
TLOOP1(*--dst = *--src); | TLOOP1(*--dst = *--src); | ||||
} | } | ||||
t = length / wsize; | t = length / wsize; | ||||
TLOOP(src -= wsize; dst -= wsize; | TLOOP(dst -= wsize; src -= wsize; | ||||
jrtc27Unsubmitted Not Done Inline ActionsWhy swap the order? jrtc27: Why swap the order? | |||||
*(word *)(void *)dst = *(const word *)(const void *)src); | *(word *)dst = *(const word *)src); | ||||
t = length & wmask; | t = length & wmask; | ||||
TLOOP(*--dst = *--src); | TLOOP(*--dst = *--src); | ||||
} | } | ||||
#endif | |||||
#undef TLOOP | |||||
#undef TLOOP1 | |||||
jrtc27Unsubmitted Done Inline ActionsThere's nothing else in the file, seems a bit unnecessary jrtc27: There's nothing else in the file, seems a bit unnecessary | |||||
done: | done: | ||||
#if defined(MEMCOPY) || defined(MEMMOVE) | #if defined(MEMCOPY) || defined(MEMMOVE) | ||||
return (dst0); | return (dst0); | ||||
Context not available. |
Why? Shouldn't make a difference for FreeBSD as, other than signedness, they should be the same. For CheriBSD we need to use a different type, intcap_t (which is what intptr_t is for pure-capability code so the existing code _almost_ works, except for hybrid code intptr_t remains a 32/64-bit integer, not a capability like intcap_t).