Index: lib/libc/powerpc64/string/Makefile.inc
===================================================================
--- /dev/null
+++ lib/libc/powerpc64/string/Makefile.inc
@@ -0,0 +1,6 @@
+# $FreeBSD$
+
+MDSRCS+= \
+	bcopy.S \
+	memcpy.S \
+	memmove.S
Index: lib/libc/powerpc64/string/bcopy.S
===================================================================
--- /dev/null
+++ lib/libc/powerpc64/string/bcopy.S
@@ -0,0 +1,351 @@
+/*-
+ * Copyright (c) 2018 Instituto de Pesquisas Eldorado
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <machine/asm.h>
+__FBSDID("$FreeBSD$");
+
+#if 0
+	RCSID("$NetBSD: bcopy.S,v 1.0 2018/03/22 13:37:42 lffpires Exp $")
+#endif
+
+// CPU version definitions
+#include <machine/spr.h>
+
+#define BCOPY_ALIGNMENT_BYTES 16
+#define BCOPY_ALIGNMENT_MASK (BCOPY_ALIGNMENT_BYTES - 1)
+
+#define BCOPY_BLOCK_SIZE_BITS 6
+#define BCOPY_BLOCK_SIZE (1 << BCOPY_BLOCK_SIZE_BITS)
+#define BCOPY_BLOCK_SIZE_MASK (BCOPY_BLOCK_SIZE - 1)
+
+#define BCOPY_BLOCK_COPY_THRESHOLD 512
+
+#define LXVD2X(xt, ra, rb) .long ((31 << 26) | ((xt & 0x1f) << 21) | ((ra & 0x1f) << 16) | ((rb & 0x1f) << 11) | (844 << 1) | ((xt & 0x20) >> 5))
+#define STXVD2X(xs, ra, rb) .long ((31 << 26) | ((xs & 0x1f) << 21) | ((ra & 0x1f) << 16) | ((rb & 0x1f) << 11) | (972 << 1) | ((xs & 0x20) >> 5))
+
+	.globl	HIDENAME(powerpc64_has_vsx)
+
+#ifdef MEMCOPY
+ENTRY(memcpy)
+#else
+#ifdef MEMMOVE
+ENTRY(memmove)
+#else
+	.section	".got","aw"
+	.align 3
+HIDENAME(powerpc64_has_vsx):
+	.llong	-1
+
+ENTRY(bcopy)
+#endif
+#endif
+	cmpld	%r3, %r4					// src == dst? if so, nothing to do
+	beqlr-	%cr0
+
+#if defined(MEMCOPY) || defined(MEMMOVE)
+	std	%r3, -8(%r1)					// save dst
+#else
+	mr	%r6, %r3
+	mr	%r3, %r4
+	mr	%r4, %r6
+#endif
+
+	cmpldi	%r5, BCOPY_BLOCK_COPY_THRESHOLD			// len >= BCOPY_BLOCK_COPY_THRESHOLD?
+	bge	%cr0, .Lmulti_phase				// if so, go to multi-phase
+
+	// set up single-phase copy parameters
+.Lsingle_phase_setup:
+	cmpd	%cr0, %r4, %r3					// forward or backward copy?
+	blt	.Lbackward_single_copy
+
+	// forward copy
+	li	%r0, 1						// increment for single phase 1-byte
+	li	%r8, 16						// increment for single phase 16-byte
+	li	%r9, 0						// pre-adjustment for single phase 16-byte
+
+	b	.Lsingle_phase
+
+.Lbackward_single_copy:
+	// backward copy
+	li	%r0, -1						// increment for single phase 1-byte
+	li	%r8, -16					// increment for single phase 16-byte
+	li	%r9, -15					// pre/post adjustment for single phase 16-byte
+	add	%r6, %r5, %r0					// %r6 = len - 1
+	add	%r3, %r3, %r6					// advance to the last position in dst
+	add	%r4, %r4, %r6					// advance to the last position in src
+
+.Lsingle_phase:
+	srdi.	%r6, %r5, 4					// number of 16-bytes
+	beq	.Lsingle_1
+
+	add	%r3, %r3, %r9					// pre-adjustment
+	add	%r4, %r4, %r9					// pre-adjustment
+
+	mtctr	%r6
+	.align 5
+.Lsingle_16_loop:
+	ld	%r6, 0(%r4)
+	ld	%r7, 8(%r4)
+	add	%r4, %r4, %r8
+	std	%r6, 0(%r3)
+	std	%r7, 8(%r3)
+	add	%r3, %r3, %r8
+	bdnz	.Lsingle_16_loop
+
+	sub	%r3, %r3, %r9					// post-adjustment
+	sub	%r4, %r4, %r9					// post-adjustment
+
+.Lsingle_1:
+	andi.	%r6, %r5, 0x0f					// number of 1-bytes
+	beq	.Ldone						// 1-bytes == 0? if so, nothing to do
+
+	mtctr	%r6
+	.align 5
+.Lsingle_1_loop:
+	lbz	%r6, 0(%r4)
+	add	%r4, %r4, %r0					// increment
+	stb	%r6, 0(%r3)
+	add	%r3, %r3, %r0					// increment
+
+	bdnz	.Lsingle_1_loop
+
+.Ldone:
+	// done copying
+
+#if defined(MEMCOPY) || defined(MEMMOVE)
+	ld	%r3, -8(%r1)					// restore dst
+#endif
+	blr
+
+
+.Lmulti_phase:
+	// set up multi-phase copy parameters
+	andi.	%r6, %r4, BCOPY_ALIGNMENT_MASK
+
+	subfic	%r7, %r6, BCOPY_ALIGNMENT_BYTES
+	andi.	%r7, %r7, BCOPY_ALIGNMENT_MASK			// %r7 = bytes before the aligned section of the buffer
+	sub	%r8, %r5, %r7					// %r8 = number of bytes in and after the aligned section of the buffer
+	andi.	%r9, %r8, BCOPY_BLOCK_SIZE_MASK			// %r9 = number of bytes after the aligned section of the buffer
+	srdi	%r10, %r8, BCOPY_BLOCK_SIZE_BITS		// %r10 = number of BLOCKS in the aligned section of the buffer
+
+	cmpd	%cr0, %r4, %r3					// forward or backward copy?
+	blt	.Lbackward_multi_copy
+
+	// set up forward copy parameters
+	std	%r7, -32(%r1)					// number of bytes to copy in phase 1
+	std	%r9, -48(%r1)					// number of bytes to copy in phase 3
+	std	%r10, -40(%r1)					// number of BLOCKS to copy in phase 2
+
+	li	%r0, 1						// increment for phases 1 and 3
+	li	%r5, BCOPY_BLOCK_SIZE				// increment for phase 2
+
+	li	%r7, 0						// offset for op 1 of phase 2
+	li	%r8, 16						// offset for op 2 of phase 2
+	li	%r9, 32						// offset for op 3 of phase 2
+	li	%r10, 48					// offset for op 4 of phase 2
+
+	std	%r8, -16(%r1)					// increment for single phase 16-byte (16)
+	std	%r7, -24(%r1)					// pre/post adjustment for single phase 16-byte (0)
+
+	b	.Lphase1
+
+.Lbackward_multi_copy:
+	// set up backward copy parameters
+	std	%r7, -48(%r1)					// number of bytes to copy in phase 3
+	std	%r9, -32(%r1)					// number of bytes to copy in phase 1
+	std	%r10, -40(%r1)					// number of BLOCKS to copy in phase 2
+
+	li	%r0, -1						// increment for phases 1 and 3
+	add	%r6, %r5, %r0					// %r6 = len - 1
+	add	%r3, %r3, %r6					// advance to the last position in dst
+	add	%r4, %r4, %r6					// advance to the last position in src
+	li	%r5, -BCOPY_BLOCK_SIZE				// increment for phase 2
+
+	li	%r7, -15					// offset for op 1 of phase 2
+	li	%r8, -31					// offset for op 2 of phase 2
+	li	%r9, -47					// offset for op 3 of phase 2
+	li	%r10, -63					// offset for op 4 of phase 2
+
+	add	%r6, %r7, %r0					// %r6 = -16
+	std	%r6, -16(%r1)					// increment for single phase 16-byte (-16)
+	std	%r7, -24(%r1)					// pre/post adjustment for single phase 16-byte (-15)
+
+.Lphase1:
+	ld	%r6, -32(%r1)					// number of bytes to copy in phase 1
+	cmpldi	%r6, 0						// %r6 == 0? (if so, nothing to copy in phase 1)
+	beq+	%cr0, .Lphase2
+
+	mtctr	%r6
+	.align 5
+.Lphase1_loop:
+	lbz	%r6, 0(%r4)
+	add	%r4, %r4, %r0					// phase 1 increment
+	stb	%r6, 0(%r3)
+	add	%r3, %r3, %r0					// phase 1 increment
+
+	bdnz	.Lphase1_loop
+
+.Lphase2:
+	ld	%r6, -40(%r1)					// number of BLOCKS to copy in phase 2
+	cmpldi	%r6, 0						// %r6 == 0? (if so, nothing to copy in phase 2)
+	beq	%cr0, .Lphase3
+
+	// check for VSX support. Should be replaced by ifunc once it becomes available
+	ld	%r6, HIDENAME(powerpc64_has_vsx)@toc(%r2)
+	cmpdi	%r6, 0
+	bgt+	.Lphase2_vsx					// has VSX support
+	beq+	.Lphase2_no_vsx					// no VSX support
+
+	// the detection code was not run before. run it now
+
+	mfpvr	%r6						// load processor version register
+	srdi	%r6, %r6, 16					// we're only interested in the version
+
+	cmpdi	%r6, IBMPOWER7
+	beq	.Lphase2_vsx_check_has_vsx
+	cmpdi	%r6, IBMPOWER7PLUS
+	beq	.Lphase2_vsx_check_has_vsx
+	cmpdi	%r6, IBMPOWER8
+	beq	.Lphase2_vsx_check_has_vsx
+	cmpdi	%r6, IBMPOWER8E
+	beq	.Lphase2_vsx_check_has_vsx
+	cmpdi	%r6, IBMPOWER9
+	beq	.Lphase2_vsx_check_has_vsx
+
+	// no VSX support
+	li	%r6, 0
+	std	%r6, HIDENAME(powerpc64_has_vsx)@toc(%r2)
+	b	.Lphase2_no_vsx
+
+.Lphase2_vsx_check_has_vsx:
+	// VSX is supported
+	li	%r6, 1
+	std	%r6, HIDENAME(powerpc64_has_vsx)@toc(%r2)
+
+.Lphase2_vsx:
+	ld	%r6, -40(%r1)					// number of BLOCKS to copy in phase 2
+	mtctr	%r6
+	.align 5
+.Lphase2_vsx_loop:
+	LXVD2X(6, 7, 4)						// lxvd2x  %vs6, %r7, %r4
+	LXVD2X(7, 8, 4)						// lxvd2x  %vs7, %r8, %r4
+	LXVD2X(8, 9, 4)						// lxvd2x  %vs8, %r9, %r4
+	LXVD2X(9, 10, 4)					// lxvd2x  %vs9, %r10, %r4
+	STXVD2X(6, 7, 3)					// stxvd2x %vs6, %r7, %r3
+	STXVD2X(7, 8, 3)					// stxvd2x %vs7, %r8, %r3
+	STXVD2X(8, 9, 3)					// stxvd2x %vs8, %r9, %r3
+	STXVD2X(9, 10, 3)					// stxvd2x %vs9, %r10, %r3
+
+	add	%r4, %r4, %r5					// phase 2 increment
+	add	%r3, %r3, %r5					// phase 2 increment
+
+	// done using %r5. from now on we can reuse it freely
+
+	bdnz	.Lphase2_vsx_loop
+
+.Lphase3:
+	// load registers for transitioning into the single-phase logic
+	ld	%r5, -48(%r1)					// number of bytes to copy in phase 3
+	ld	%r8, -16(%r1)					// increment for single phase 16-byte
+	ld	%r9, -24(%r1)					// pre/post adjustment for single phase 16-byte
+	b	.Lsingle_phase
+
+.Lphase2_no_vsx:
+	// save registers
+	std	%r14, -56(%r1)
+	std	%r15, -64(%r1)
+	std	%r16, -72(%r1)
+	std	%r17, -80(%r1)
+	std	%r18, -88(%r1)
+	std	%r19, -96(%r1)
+	std	%r20, -104(%r1)
+	std	%r21, -112(%r1)
+
+	addi	%r18, %r7, 8
+	addi	%r19, %r8, 8
+	addi	%r20, %r9, 8
+	addi	%r21, %r10, 8
+
+	ld	%r6, -40(%r1)					// number of BLOCKS to copy in phase 2
+	mtctr	%r6
+	.align 5
+.Lphase2_no_vsx_loop:
+	ldx	%r14, %r7, %r4
+	ldx	%r15, %r18, %r4
+	ldx	%r16, %r8, %r4
+	ldx	%r17, %r19, %r4
+	stdx	%r14, %r7, %r3
+	stdx	%r15, %r18, %r3
+	stdx	%r16, %r8, %r3
+	stdx	%r17, %r19, %r3
+
+	ldx	%r14, %r9, %r4
+	ldx	%r15, %r20, %r4
+	ldx	%r16, %r10, %r4
+	ldx	%r17, %r21, %r4
+	stdx	%r14, %r9, %r3
+	stdx	%r15, %r20, %r3
+	stdx	%r16, %r10, %r3
+	stdx	%r17, %r21, %r3
+
+	add	%r4, %r4, %r5					// phase 2 increment
+	add	%r3, %r3, %r5					// phase 2 increment
+
+	// done using %r5. from now on we can reuse it freely
+
+	bdnz	.Lphase2_no_vsx_loop
+
+	// restore registers
+	ld	%r14, -56(%r1)
+	ld	%r15, -64(%r1)
+	ld	%r16, -72(%r1)
+	ld	%r17, -80(%r1)
+	ld	%r18, -88(%r1)
+	ld	%r19, -96(%r1)
+	ld	%r20, -104(%r1)
+	ld	%r21, -112(%r1)
+
+	// load registers for transitioning into the single-phase logic
+	ld	%r5, -48(%r1)					// number of bytes to copy in phase 3
+	ld	%r8, -16(%r1)					// increment for single phase 16-byte
+	ld	%r9, -24(%r1)					// pre/post adjustment for single phase 16-byte
+	b	.Lsingle_phase
+
+#ifdef MEMCOPY
+END(memcpy)
+#else
+#ifdef MEMMOVE
+END(memmove)
+#else
+END(bcopy)
+#endif
+#endif
+
+	.section .note.GNU-stack,"",%progbits
+
Index: lib/libc/powerpc64/string/memcpy.S
===================================================================
--- /dev/null
+++ lib/libc/powerpc64/string/memcpy.S
@@ -0,0 +1,5 @@
+/*	$NetBSD: memcpy.S,v 1.1 2001/06/19 00:25:05 fvdl Exp $	*/
+/*	$FreeBSD$ */
+
+#define MEMCOPY
+#include "bcopy.S"
Index: lib/libc/powerpc64/string/memmove.S
===================================================================
--- /dev/null
+++ lib/libc/powerpc64/string/memmove.S
@@ -0,0 +1,5 @@
+/*	$NetBSD: memmove.S,v 1.1 2001/06/19 00:25:05 fvdl Exp $	*/
+/*	$FreeBSD$ */
+
+#define MEMMOVE
+#include "bcopy.S"