Page MenuHomeFreeBSD

D15118.id41587'[0].diff
No OneTemporary

D15118.id41587'[0].diff

Index: lib/libc/powerpc64/string/Makefile.inc
===================================================================
--- /dev/null
+++ lib/libc/powerpc64/string/Makefile.inc
@@ -0,0 +1,6 @@
+# $FreeBSD$
+
+MDSRCS+= \
+ bcopy.S \
+ memcpy.S \
+ memmove.S
Index: lib/libc/powerpc64/string/bcopy.S
===================================================================
--- /dev/null
+++ lib/libc/powerpc64/string/bcopy.S
@@ -0,0 +1,351 @@
+/*-
+ * Copyright (c) 2018 Instituto de Pesquisas Eldorado
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of its contributors may
+ * be used to endorse or promote products derived from this software
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <machine/asm.h>
+__FBSDID("$FreeBSD$");
+
+#if 0
+ RCSID("$NetBSD: bcopy.S,v 1.0 2018/03/22 13:37:42 lffpires Exp $")
+#endif
+
+// CPU version definitions
+#include <machine/spr.h>
+
+#define BCOPY_ALIGNMENT_BYTES 16
+#define BCOPY_ALIGNMENT_MASK (BCOPY_ALIGNMENT_BYTES - 1)
+
+#define BCOPY_BLOCK_SIZE_BITS 6
+#define BCOPY_BLOCK_SIZE (1 << BCOPY_BLOCK_SIZE_BITS)
+#define BCOPY_BLOCK_SIZE_MASK (BCOPY_BLOCK_SIZE - 1)
+
+#define BCOPY_BLOCK_COPY_THRESHOLD 512
+
+#define LXVD2X(xt, ra, rb) .long ((31 << 26) | ((xt & 0x1f) << 21) | ((ra & 0x1f) << 16) | ((rb & 0x1f) << 11) | (844 << 1) | ((xt & 0x20) >> 5))
+#define STXVD2X(xs, ra, rb) .long ((31 << 26) | ((xs & 0x1f) << 21) | ((ra & 0x1f) << 16) | ((rb & 0x1f) << 11) | (972 << 1) | ((xs & 0x20) >> 5))
+
+ .globl HIDENAME(powerpc64_has_vsx)
+
+#ifdef MEMCOPY
+ENTRY(memcpy)
+#else
+#ifdef MEMMOVE
+ENTRY(memmove)
+#else
+ .section ".got","aw"
+ .align 3
+HIDENAME(powerpc64_has_vsx):
+ .llong -1
+
+ENTRY(bcopy)
+#endif
+#endif
+ cmpld %r3, %r4 // src == dst? if so, nothing to do
+ beqlr- %cr0
+
+#if defined(MEMCOPY) || defined(MEMMOVE)
+ std %r3, -8(%r1) // save dst
+#else
+ mr %r6, %r3
+ mr %r3, %r4
+ mr %r4, %r6
+#endif
+
+ cmpldi %r5, BCOPY_BLOCK_COPY_THRESHOLD // len >= BCOPY_BLOCK_COPY_THRESHOLD?
+ bge %cr0, .Lmulti_phase // if so, go to multi-phase
+
+ // set up single-phase copy parameters
+.Lsingle_phase_setup:
+ cmpd %cr0, %r4, %r3 // forward or backward copy?
+ blt .Lbackward_single_copy
+
+ // forward copy
+ li %r0, 1 // increment for single phase 1-byte
+ li %r8, 16 // increment for single phase 16-byte
+ li %r9, 0 // pre-adjustment for single phase 16-byte
+
+ b .Lsingle_phase
+
+.Lbackward_single_copy:
+ // backward copy
+ li %r0, -1 // increment for single phase 1-byte
+ li %r8, -16 // increment for single phase 16-byte
+ li %r9, -15 // pre/post adjustment for single phase 16-byte
+ add %r6, %r5, %r0 // %r6 = len - 1
+ add %r3, %r3, %r6 // advance to the last position in dst
+ add %r4, %r4, %r6 // advance to the last position in src
+
+.Lsingle_phase:
+ srdi. %r6, %r5, 4 // number of 16-bytes
+ beq .Lsingle_1
+
+ add %r3, %r3, %r9 // pre-adjustment
+ add %r4, %r4, %r9 // pre-adjustment
+
+ mtctr %r6
+ .align 5
+.Lsingle_16_loop:
+ ld %r6, 0(%r4)
+ ld %r7, 8(%r4)
+ add %r4, %r4, %r8
+ std %r6, 0(%r3)
+ std %r7, 8(%r3)
+ add %r3, %r3, %r8
+ bdnz .Lsingle_16_loop
+
+ sub %r3, %r3, %r9 // post-adjustment
+ sub %r4, %r4, %r9 // post-adjustment
+
+.Lsingle_1:
+ andi. %r6, %r5, 0x0f // number of 1-bytes
+ beq .Ldone // 1-bytes == 0? if so, nothing to do
+
+ mtctr %r6
+ .align 5
+.Lsingle_1_loop:
+ lbz %r6, 0(%r4)
+ add %r4, %r4, %r0 // increment
+ stb %r6, 0(%r3)
+ add %r3, %r3, %r0 // increment
+
+ bdnz .Lsingle_1_loop
+
+.Ldone:
+ // done copying
+
+#if defined(MEMCOPY) || defined(MEMMOVE)
+ ld %r3, -8(%r1) // restore dst
+#endif
+ blr
+
+
+.Lmulti_phase:
+ // set up multi-phase copy parameters
+ andi. %r6, %r4, BCOPY_ALIGNMENT_MASK
+
+ subfic %r7, %r6, BCOPY_ALIGNMENT_BYTES
+ andi. %r7, %r7, BCOPY_ALIGNMENT_MASK // %r7 = bytes before the aligned section of the buffer
+ sub %r8, %r5, %r7 // %r8 = number of bytes in and after the aligned section of the buffer
+ andi. %r9, %r8, BCOPY_BLOCK_SIZE_MASK // %r9 = number of bytes after the aligned section of the buffer
+ srdi %r10, %r8, BCOPY_BLOCK_SIZE_BITS // %r10 = number of BLOCKS in the aligned section of the buffer
+
+ cmpd %cr0, %r4, %r3 // forward or backward copy?
+ blt .Lbackward_multi_copy
+
+ // set up forward copy parameters
+ std %r7, -32(%r1) // number of bytes to copy in phase 1
+ std %r9, -48(%r1) // number of bytes to copy in phase 3
+ std %r10, -40(%r1) // number of BLOCKS to copy in phase 2
+
+ li %r0, 1 // increment for phases 1 and 3
+ li %r5, BCOPY_BLOCK_SIZE // increment for phase 2
+
+ li %r7, 0 // offset for op 1 of phase 2
+ li %r8, 16 // offset for op 2 of phase 2
+ li %r9, 32 // offset for op 3 of phase 2
+ li %r10, 48 // offset for op 4 of phase 2
+
+ std %r8, -16(%r1) // increment for single phase 16-byte (16)
+ std %r7, -24(%r1) // pre/post adjustment for single phase 16-byte (0)
+
+ b .Lphase1
+
+.Lbackward_multi_copy:
+ // set up backward copy parameters
+ std %r7, -48(%r1) // number of bytes to copy in phase 3
+ std %r9, -32(%r1) // number of bytes to copy in phase 1
+ std %r10, -40(%r1) // number of BLOCKS to copy in phase 2
+
+ li %r0, -1 // increment for phases 1 and 3
+ add %r6, %r5, %r0 // %r6 = len - 1
+ add %r3, %r3, %r6 // advance to the last position in dst
+ add %r4, %r4, %r6 // advance to the last position in src
+ li %r5, -BCOPY_BLOCK_SIZE // increment for phase 2
+
+ li %r7, -15 // offset for op 1 of phase 2
+ li %r8, -31 // offset for op 2 of phase 2
+ li %r9, -47 // offset for op 3 of phase 2
+ li %r10, -63 // offset for op 4 of phase 2
+
+ add %r6, %r7, %r0 // %r6 = -16
+ std %r6, -16(%r1) // increment for single phase 16-byte (-16)
+ std %r7, -24(%r1) // pre/post adjustment for single phase 16-byte (-15)
+
+.Lphase1:
+ ld %r6, -32(%r1) // number of bytes to copy in phase 1
+ cmpldi %r6, 0 // %r6 == 0? (if so, nothing to copy in phase 1)
+ beq+ %cr0, .Lphase2
+
+ mtctr %r6
+ .align 5
+.Lphase1_loop:
+ lbz %r6, 0(%r4)
+ add %r4, %r4, %r0 // phase 1 increment
+ stb %r6, 0(%r3)
+ add %r3, %r3, %r0 // phase 1 increment
+
+ bdnz .Lphase1_loop
+
+.Lphase2:
+ ld %r6, -40(%r1) // number of BLOCKS to copy in phase 2
+ cmpldi %r6, 0 // %r6 == 0? (if so, nothing to copy in phase 2)
+ beq %cr0, .Lphase3
+
+ // check for VSX support. Should be replaced by ifunc once it becomes available
+ ld %r6, HIDENAME(powerpc64_has_vsx)@toc(%r2)
+ cmpdi %r6, 0
+ bgt+ .Lphase2_vsx // has VSX support
+ beq+ .Lphase2_no_vsx // no VSX support
+
+ // the detection code was not run before. run it now
+
+ mfpvr %r6 // load processor version register
+ srdi %r6, %r6, 16 // we're only interested in the version
+
+ cmpdi %r6, IBMPOWER7
+ beq .Lphase2_vsx_check_has_vsx
+ cmpdi %r6, IBMPOWER7PLUS
+ beq .Lphase2_vsx_check_has_vsx
+ cmpdi %r6, IBMPOWER8
+ beq .Lphase2_vsx_check_has_vsx
+ cmpdi %r6, IBMPOWER8E
+ beq .Lphase2_vsx_check_has_vsx
+ cmpdi %r6, IBMPOWER9
+ beq .Lphase2_vsx_check_has_vsx
+
+ // no VSX support
+ li %r6, 0
+ std %r6, HIDENAME(powerpc64_has_vsx)@toc(%r2)
+ b .Lphase2_no_vsx
+
+.Lphase2_vsx_check_has_vsx:
+ // VSX is supported
+ li %r6, 1
+ std %r6, HIDENAME(powerpc64_has_vsx)@toc(%r2)
+
+.Lphase2_vsx:
+ ld %r6, -40(%r1) // number of BLOCKS to copy in phase 2
+ mtctr %r6
+ .align 5
+.Lphase2_vsx_loop:
+ LXVD2X(6, 7, 4) // lxvd2x %vs6, %r7, %r4
+ LXVD2X(7, 8, 4) // lxvd2x %vs7, %r8, %r4
+ LXVD2X(8, 9, 4) // lxvd2x %vs8, %r9, %r4
+ LXVD2X(9, 10, 4) // lxvd2x %vs9, %r10, %r4
+ STXVD2X(6, 7, 3) // stxvd2x %vs6, %r7, %r3
+ STXVD2X(7, 8, 3) // stxvd2x %vs7, %r8, %r3
+ STXVD2X(8, 9, 3) // stxvd2x %vs8, %r9, %r3
+ STXVD2X(9, 10, 3) // stxvd2x %vs9, %r10, %r3
+
+ add %r4, %r4, %r5 // phase 2 increment
+ add %r3, %r3, %r5 // phase 2 increment
+
+ // done using %r5. from now on we can reuse it freely
+
+ bdnz .Lphase2_vsx_loop
+
+.Lphase3:
+ // load registers for transitioning into the single-phase logic
+ ld %r5, -48(%r1) // number of bytes to copy in phase 3
+ ld %r8, -16(%r1) // increment for single phase 16-byte
+ ld %r9, -24(%r1) // pre/post adjustment for single phase 16-byte
+ b .Lsingle_phase
+
+.Lphase2_no_vsx:
+ // save registers
+ std %r14, -56(%r1)
+ std %r15, -64(%r1)
+ std %r16, -72(%r1)
+ std %r17, -80(%r1)
+ std %r18, -88(%r1)
+ std %r19, -96(%r1)
+ std %r20, -104(%r1)
+ std %r21, -112(%r1)
+
+ addi %r18, %r7, 8
+ addi %r19, %r8, 8
+ addi %r20, %r9, 8
+ addi %r21, %r10, 8
+
+ ld %r6, -40(%r1) // number of BLOCKS to copy in phase 2
+ mtctr %r6
+ .align 5
+.Lphase2_no_vsx_loop:
+ ldx %r14, %r7, %r4
+ ldx %r15, %r18, %r4
+ ldx %r16, %r8, %r4
+ ldx %r17, %r19, %r4
+ stdx %r14, %r7, %r3
+ stdx %r15, %r18, %r3
+ stdx %r16, %r8, %r3
+ stdx %r17, %r19, %r3
+
+ ldx %r14, %r9, %r4
+ ldx %r15, %r20, %r4
+ ldx %r16, %r10, %r4
+ ldx %r17, %r21, %r4
+ stdx %r14, %r9, %r3
+ stdx %r15, %r20, %r3
+ stdx %r16, %r10, %r3
+ stdx %r17, %r21, %r3
+
+ add %r4, %r4, %r5 // phase 2 increment
+ add %r3, %r3, %r5 // phase 2 increment
+
+ // done using %r5. from now on we can reuse it freely
+
+ bdnz .Lphase2_no_vsx_loop
+
+ // restore registers
+ ld %r14, -56(%r1)
+ ld %r15, -64(%r1)
+ ld %r16, -72(%r1)
+ ld %r17, -80(%r1)
+ ld %r18, -88(%r1)
+ ld %r19, -96(%r1)
+ ld %r20, -104(%r1)
+ ld %r21, -112(%r1)
+
+ // load registers for transitioning into the single-phase logic
+ ld %r5, -48(%r1) // number of bytes to copy in phase 3
+ ld %r8, -16(%r1) // increment for single phase 16-byte
+ ld %r9, -24(%r1) // pre/post adjustment for single phase 16-byte
+ b .Lsingle_phase
+
+#ifdef MEMCOPY
+END(memcpy)
+#else
+#ifdef MEMMOVE
+END(memmove)
+#else
+END(bcopy)
+#endif
+#endif
+
+ .section .note.GNU-stack,"",%progbits
+
Index: lib/libc/powerpc64/string/memcpy.S
===================================================================
--- /dev/null
+++ lib/libc/powerpc64/string/memcpy.S
@@ -0,0 +1,5 @@
+/* $NetBSD: memcpy.S,v 1.1 2001/06/19 00:25:05 fvdl Exp $ */
+/* $FreeBSD$ */
+
+#define MEMCOPY
+#include "bcopy.S"
Index: lib/libc/powerpc64/string/memmove.S
===================================================================
--- /dev/null
+++ lib/libc/powerpc64/string/memmove.S
@@ -0,0 +1,5 @@
+/* $NetBSD: memmove.S,v 1.1 2001/06/19 00:25:05 fvdl Exp $ */
+/* $FreeBSD$ */
+
+#define MEMMOVE
+#include "bcopy.S"

File Metadata

Mime Type
text/plain
Expires
Fri, Feb 28, 2:10 PM (11 h, 25 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16886288
Default Alt Text
D15118.id41587'[0].diff (11 KB)

Event Timeline