Changeset View
Standalone View
lib/libc/powerpc64/string/ppc64_bcopy.c
- This file was added.
/*- | |||||
* Copyright (c) 2018 Instituto de Pesquisas Eldorado | |||||
* All rights reserved. | |||||
* | |||||
* Redistribution and use in source and binary forms, with or without | |||||
* modification, are permitted provided that the following conditions | |||||
* are met: | |||||
* 1. Redistributions of source code must retain the above copyright | |||||
* notice, this list of conditions and the following disclaimer. | |||||
* 2. Redistributions in binary form must reproduce the above copyright | |||||
* notice, this list of conditions and the following disclaimer in the | |||||
* documentation and/or other materials provided with the distribution. | |||||
* 3. Neither the name of the author nor the names of its contributors may | |||||
* be used to endorse or promote products derived from this software | |||||
* | |||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |||||
* SUCH DAMAGE. | |||||
* | |||||
*/ | |||||
#include <sys/cdefs.h> | |||||
__FBSDID("$FreeBSD$"); | |||||
#include <sys/types.h> | |||||
#include <sys/auxv.h> | |||||
#include <machine/cpu.h> | |||||
#include <machine/atomic.h> | |||||
#include <stdlib.h> | |||||
#ifdef MEMCOPY | |||||
extern int bcopy_has_vsx; | |||||
extern void* memcpy_plain(void *dst, const void *src, size_t len); | |||||
extern void* memcpy_vsx(void *dst, const void *src, size_t len); | |||||
void* memcpy(void *dst, const void *src, size_t len) | |||||
#elif defined(MEMMOVE) | |||||
extern int bcopy_has_vsx; | |||||
extern void* bcopy_plain(void *dst, const void *src, size_t len); | |||||
jhibbits: These should be combined to '#elif defined()' instead. Makes it read a little cleaner. In the… | |||||
extern void* bcopy_vsx(void *dst, const void *src, size_t len); | |||||
void* memmove(void *dst, const void *src, size_t len) | |||||
#else | |||||
int bcopy_has_vsx = -1; | |||||
extern void* bcopy_plain(void *dst, const void *src, size_t len); | |||||
extern void* bcopy_vsx(void *dst, const void *src, size_t len); | |||||
void bcopy(const void *src, void *dst, size_t len) | |||||
#endif | |||||
{ | |||||
/* XXX: all of this should be replaced with ifunc code once it's available */ | |||||
if (bcopy_has_vsx < 0) { | |||||
/* | |||||
* Initialize bcopy_has_vsx to 0, at least until elf_aux_info() returns. | |||||
* Otherwise, if elf_aux_info() calls bcopy/memcpy/memmove, we would enter an infinite loop. | |||||
*/ | |||||
if (atomic_cmpset_int(&bcopy_has_vsx, -1, 0) != 0) { | |||||
u_long hwcap; | |||||
Done Inline Actions@nwhitehorn, I just realized I can't call sysctlbyname() here, because it relies on bcopy/memcpy/memmove. Thanks, lffpires_ruabrasil.org: @nwhitehorn, I just realized I can't call sysctlbyname() here, because it relies on… | |||||
Done Inline ActionsCan't you just temporarily set bcopy_has_vsx = 0 until this if block finishes? nwhitehorn: Can't you just temporarily set bcopy_has_vsx = 0 until this if block finishes? | |||||
Done Inline ActionsGreat idea! After doing that, I was still getting an abort signal, but the root cause was in a completely different area - the stack protection initialization in libc. I'll post a candidate fix for that in a separate review. lffpires_ruabrasil.org: Great idea! After doing that, I was still getting an abort signal, but the root cause was in a… | |||||
Done Inline ActionsIt looks like this needs to be added in the kernel. The kernel automatically exports AT_HWCAP if it exists in the sysentvec struct (see sys/powerpc/powerpc/elf64_machdep.c and sys/kern/imgact_elf.c), so if we simply add it to the structure, the way ARM does, that should be sufficient, and you can use elf_aux_info() to retrieve the capability. jhibbits: It looks like this needs to be added in the kernel. The kernel automatically exports AT_HWCAP… | |||||
Done Inline ActionsThanks for adding that to the kernel! In addition to setting bcopy_has_vsx to 0, as @nwhitehorn suggested and fixing the stack protection initialization, I'm now using elf_aux_info() to get AT_HWCAP. lffpires_ruabrasil.org: Thanks for adding that to the kernel! In addition to setting bcopy_has_vsx to 0, as @nwhitehorn… | |||||
if (elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap)) == 0 && | |||||
(hwcap & PPC_FEATURE_HAS_VSX) != 0) { | |||||
atomic_set_int(&bcopy_has_vsx, 1); | |||||
} | |||||
} | |||||
} | |||||
if (bcopy_has_vsx > 0) { | |||||
/* VSX is supported */ | |||||
#if defined(MEMCOPY) | |||||
return memcpy_vsx(dst, src, len); | |||||
#elif defined(MEMMOVE) | |||||
return bcopy_vsx(dst, src, len); | |||||
#else | |||||
bcopy_vsx(dst, src, len); | |||||
#endif | |||||
} else { | |||||
/* VSX is not supported */ | |||||
#if defined(MEMCOPY) | |||||
return memcpy_plain(dst, src, len); | |||||
#elif defined(MEMMOVE) | |||||
Done Inline ActionsWhy not merge these into just a single call to memmove_vsx()? The only difference between bcopy() and memmove() is the order of src/dst. jhibbits: Why not merge these into just a single call to memmove_vsx()? The only difference between… | |||||
Done Inline ActionsMakes sense. I suggest merging only memcpy+memmove (whose implementations are exactly the same), and still leaving bcopy separate, though. The reason for keeping bcopy separate is that, once we have ifunc support for powerpc64, we'll be ready to just replace the functions in this file with the resolver functions for memcpy, memmove and bcopy. What do you think? lffpires_ruabrasil.org: Makes sense. I suggest merging only memcpy+memmove (whose implementations are exactly the same)… | |||||
Done Inline ActionsI meant merge memmove_vsx and bcopy_vsx into memmove_vsx, with bcopy simply calling memmove_vsx() with the swapped arguments. Same below with memmove_plain/bcopy_plain. If memcopy_vsx is identical to memmove_vsx, then they could be merged as well, to simplify the code path even further. jhibbits: I meant merge memmove_vsx and bcopy_vsx into memmove_vsx, with bcopy simply calling memmove_vsx… | |||||
Done Inline ActionsI was analyzing this and it is possible to simplify removing memcpy.S, memcpy_vsx.S, memmove.S, memmove_vsx.S, ppc64_memcpy.c and ppc64_memmove.c, leaving only the functions bcopy_vsx and bcopy_plain for all cases (memcpy, memmove, bcopy). @jhibbits, was it your intention? leonardo.bianconi_eldorado.org.br: I was analyzing this and it is possible to simplify removing `memcpy.S`, `memcpy_vsx.S`… | |||||
Done Inline ActionsYeah, that's more what I was thinking. Bear in mind that memcpy() is intended to be fast while memmove() is intended to be "correct", so we might want to keep memcpy() as separate from bcopy()/memmove() (which perform the exact same function as each other, with swapped arguments). I did just check the man page for memcpy(3), which shows that it's implemented in terms of bcopy(), so I guess it's not a major concern. However, it'd be interesting to see if there's a performance advantage to ignoring overlap. jhibbits: Yeah, that's more what I was thinking.
Bear in mind that memcpy() is intended to be fast while… | |||||
Done Inline ActionsI have simplified the path related with memmove, as it is the same of bcopy, but still need to have three cases, as the two in the last differ on returning the value. leonardo.bianconi_eldorado.org.br: I have simplified the path related with memmove, as it is the same of bcopy, but still need to… | |||||
return bcopy_plain(dst, src, len); | |||||
#else | |||||
bcopy_plain(dst, src, len); | |||||
#endif | |||||
} | |||||
} |
These should be combined to '#elif defined()' instead. Makes it read a little cleaner. In the other places in this file and the other file as well.