diff --git a/sys/arm/arm/bcopyinout.S b/sys/arm/arm/bcopyinout.S
index 2a9d48f989ea..dcdf8ed5af0c 100644
--- a/sys/arm/arm/bcopyinout.S
+++ b/sys/arm/arm/bcopyinout.S
@@ -1,129 +1,124 @@
 /*	$NetBSD: bcopyinout.S,v 1.11 2003/10/13 21:22:40 scw Exp $	*/
 
 /*-
  * Copyright (c) 2002 Wasabi Systems, Inc.
  * All rights reserved.
  *
  * Written by Allen Briggs for Wasabi Systems, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed for the NetBSD Project by
  *      Wasabi Systems, Inc.
  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
  *    or promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 
 #include "assym.inc"
 
 #include <machine/asm.h>
 #include <sys/errno.h>
 
-.L_arm_memcpy:
-	.word	_C_LABEL(_arm_memcpy)
-.L_min_memcpy_size:
-	.word	_C_LABEL(_min_memcpy_size)
-
 __FBSDID("$FreeBSD$");
 #include <arm/arm/bcopyinout_xscale.S>
 
 /*
  * int badaddr_read_1(const uint8_t *src, uint8_t *dest)
  *
  * Copies a single 8-bit value from src to dest, returning 0 on success,
  * else EFAULT if a page fault occurred.
  */
 ENTRY(badaddr_read_1)
 	GET_PCB(r2)
 	ldr	r2, [r2]
 
 	ldr	ip, [r2, #PCB_ONFAULT]
 	adr	r3, 1f
 	str	r3, [r2, #PCB_ONFAULT]
 	nop
 	nop
 	nop
 	ldrb	r3, [r0]
 	nop
 	nop
 	nop
 	strb	r3, [r1]
 	mov	r0, #0		/* No fault */
 1:	str	ip, [r2, #PCB_ONFAULT]
 	RET
 END(badaddr_read_1)
 
 /*
  * int badaddr_read_2(const uint16_t *src, uint16_t *dest)
  *
  * Copies a single 16-bit value from src to dest, returning 0 on success,
  * else EFAULT if a page fault occurred.
  */
 ENTRY(badaddr_read_2)
 	GET_PCB(r2)
 	ldr	r2, [r2]
 
 	ldr	ip, [r2, #PCB_ONFAULT]
 	adr	r3, 1f
 	str	r3, [r2, #PCB_ONFAULT]
 	nop
 	nop
 	nop
 	ldrh	r3, [r0]
 	nop
 	nop
 	nop
 	strh	r3, [r1]
 	mov	r0, #0		/* No fault */
 1:	str	ip, [r2, #PCB_ONFAULT]
 	RET
 END(badaddr_read_2)
 
 /*
  * int badaddr_read_4(const uint32_t *src, uint32_t *dest)
  *
  * Copies a single 32-bit value from src to dest, returning 0 on success,
  * else EFAULT if a page fault occurred.
  */
 ENTRY(badaddr_read_4)
 	GET_PCB(r2)
 	ldr	r2, [r2]
 
 	ldr	ip, [r2, #PCB_ONFAULT]
 	adr	r3, 1f
 	str	r3, [r2, #PCB_ONFAULT]
 	nop
 	nop
 	nop
 	ldr	r3, [r0]
 	nop
 	nop
 	nop
 	str	r3, [r1]
 	mov	r0, #0		/* No fault */
 1:	str	ip, [r2, #PCB_ONFAULT]
 	RET
 END(badaddr_read_4)
 
diff --git a/sys/arm/arm/bcopyinout_xscale.S b/sys/arm/arm/bcopyinout_xscale.S
index e592b901201f..4849ab9e794a 100644
--- a/sys/arm/arm/bcopyinout_xscale.S
+++ b/sys/arm/arm/bcopyinout_xscale.S
@@ -1,819 +1,775 @@
 /*	$NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $	*/
 
 /*-
  * Copyright 2003 Wasabi Systems, Inc.
  * All rights reserved.
  *
  * Written by Steve C. Woodford for Wasabi Systems, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed for the NetBSD Project by
  *      Wasabi Systems, Inc.
  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
  *    or promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <machine/asm.h>
 __FBSDID("$FreeBSD$");
 
 	.syntax	unified
 	.text
 	.align	2
 
 #define GET_PCB(tmp) \
 	mrc p15, 0, tmp, c13, c0, 4; \
 	add	tmp, tmp, #(TD_PCB)
 
 /*
  * r0 = user space address
  * r1 = kernel space address
  * r2 = length
  *
  * Copies bytes from user space to kernel space
  */
 ENTRY(copyin)
 	cmp	r2, #0x00
 	movle	r0, #0x00
 	movle	pc, lr			/* Bail early if length is <= 0 */
 
 	adds	r3, r0, r2
 	movcs	r0, #EFAULT
 	RETc(cs)
 
 	ldr	r12, =(VM_MAXUSER_ADDRESS + 1)
 	cmp	r3, r12
 	movcs	r0, #EFAULT
 	RETc(cs)
 
-	ldr	r3, .L_arm_memcpy
-	ldr	r3, [r3]
-	cmp	r3, #0
-	beq	.Lnormal
-	ldr	r3, .L_min_memcpy_size
-	ldr	r3, [r3]
-	cmp	r2, r3
-	blt	.Lnormal
-	stmfd	sp!, {r0-r2, r4, lr}
-	mov     r3, r0
-	mov     r0, r1
-	mov     r1, r3
-	mov     r3, #2 /* SRC_IS_USER */
-	ldr	r4, .L_arm_memcpy
-	mov	lr, pc
-	ldr	pc, [r4]
-	cmp     r0, #0
-	ldmfd   sp!, {r0-r2, r4, lr}
-	moveq	r0, #0
-	RETeq
-
-.Lnormal:
 	stmfd	sp!, {r10-r11, lr}
 
 	GET_PCB(r10)
 	ldr	r10, [r10]
 
 	mov	r3, #0x00
 	adr	ip, .Lcopyin_fault
 	ldr	r11, [r10, #PCB_ONFAULT]
 	str	ip, [r10, #PCB_ONFAULT]
 	bl	.Lcopyin_guts
 	str	r11, [r10, #PCB_ONFAULT]
 	mov	r0, #0x00
 	ldmfd	sp!, {r10-r11, pc}
 
 .Lcopyin_fault:
 	ldr	r0, =EFAULT
 	str	r11, [r10, #PCB_ONFAULT]
 	cmp	r3, #0x00
 	ldmfdgt	sp!, {r4-r7}		/* r3 > 0 Restore r4-r7 */
 	ldmfdlt	sp!, {r4-r9}		/* r3 < 0 Restore r4-r9 */
 	ldmfd	sp!, {r10-r11, pc}
 
 .Lcopyin_guts:
 	pld	[r0]
 	/* Word-align the destination buffer */
 	ands	ip, r1, #0x03		/* Already word aligned? */
 	beq	.Lcopyin_wordaligned	/* Yup */
 	rsb	ip, ip, #0x04
 	cmp	r2, ip			/* Enough bytes left to align it? */
 	blt	.Lcopyin_l4_2		/* Nope. Just copy bytewise */
 	sub	r2, r2, ip
 	rsbs	ip, ip, #0x03
 	addne	pc, pc, ip, lsl #3
 	nop
 	ldrbt	ip, [r0], #0x01
 	strb	ip, [r1], #0x01
 	ldrbt	ip, [r0], #0x01
 	strb	ip, [r1], #0x01
 	ldrbt	ip, [r0], #0x01
 	strb	ip, [r1], #0x01
 	cmp	r2, #0x00		/* All done? */
 	RETeq
 
 	/* Destination buffer is now word aligned */
 .Lcopyin_wordaligned:
 	ands	ip, r0, #0x03		/* Is src also word-aligned? */
 	bne	.Lcopyin_bad_align	/* Nope. Things just got bad */
 	cmp	r2, #0x08		/* Less than 8 bytes remaining? */
 	blt	.Lcopyin_w_less_than8
 
 	/* Quad-align the destination buffer */
 	tst	r1, #0x07		/* Already quad aligned? */
 	ldrtne	ip, [r0], #0x04
 	strne	ip, [r1], #0x04
 	subne	r2, r2, #0x04
 	stmfd	sp!, {r4-r9}		/* Free up some registers */
 	mov	r3, #-1			/* Signal restore r4-r9 */
 
 	/* Destination buffer quad aligned, source is word aligned */
 	subs	r2, r2, #0x80
 	blt	.Lcopyin_w_lessthan128
 
 	/* Copy 128 bytes at a time */
 .Lcopyin_w_loop128:
 	ldrt	r4, [r0], #0x04		/* LD:00-03 */
 	ldrt	r5, [r0], #0x04		/* LD:04-07 */
 	pld	[r0, #0x18]		/* Prefetch 0x20 */
 	ldrt	r6, [r0], #0x04		/* LD:08-0b */
 	ldrt	r7, [r0], #0x04		/* LD:0c-0f */
 	ldrt	r8, [r0], #0x04		/* LD:10-13 */
 	ldrt	r9, [r0], #0x04		/* LD:14-17 */
 	strd	r4, [r1], #0x08		/* ST:00-07 */
 	ldrt	r4, [r0], #0x04		/* LD:18-1b */
 	ldrt	r5, [r0], #0x04		/* LD:1c-1f */
 	strd	r6, [r1], #0x08		/* ST:08-0f */
 	ldrt	r6, [r0], #0x04		/* LD:20-23 */
 	ldrt	r7, [r0], #0x04		/* LD:24-27 */
 	pld	[r0, #0x18]		/* Prefetch 0x40 */
 	strd	r8, [r1], #0x08		/* ST:10-17 */
 	ldrt	r8, [r0], #0x04		/* LD:28-2b */
 	ldrt	r9, [r0], #0x04		/* LD:2c-2f */
 	strd	r4, [r1], #0x08		/* ST:18-1f */
 	ldrt	r4, [r0], #0x04		/* LD:30-33 */
 	ldrt	r5, [r0], #0x04		/* LD:34-37 */
 	strd	r6, [r1], #0x08		/* ST:20-27 */
 	ldrt	r6, [r0], #0x04		/* LD:38-3b */
 	ldrt	r7, [r0], #0x04		/* LD:3c-3f */
 	strd	r8, [r1], #0x08		/* ST:28-2f */
 	ldrt	r8, [r0], #0x04		/* LD:40-43 */
 	ldrt	r9, [r0], #0x04		/* LD:44-47 */
 	pld	[r0, #0x18]		/* Prefetch 0x60 */
 	strd	r4, [r1], #0x08		/* ST:30-37 */
 	ldrt	r4, [r0], #0x04		/* LD:48-4b */
 	ldrt	r5, [r0], #0x04		/* LD:4c-4f */
 	strd	r6, [r1], #0x08		/* ST:38-3f */
 	ldrt	r6, [r0], #0x04		/* LD:50-53 */
 	ldrt	r7, [r0], #0x04		/* LD:54-57 */
 	strd	r8, [r1], #0x08		/* ST:40-47 */
 	ldrt	r8, [r0], #0x04		/* LD:58-5b */
 	ldrt	r9, [r0], #0x04		/* LD:5c-5f */
 	strd	r4, [r1], #0x08		/* ST:48-4f */
 	ldrt	r4, [r0], #0x04		/* LD:60-63 */
 	ldrt	r5, [r0], #0x04		/* LD:64-67 */
 	pld	[r0, #0x18]		/* Prefetch 0x80 */
 	strd	r6, [r1], #0x08		/* ST:50-57 */
 	ldrt	r6, [r0], #0x04		/* LD:68-6b */
 	ldrt	r7, [r0], #0x04		/* LD:6c-6f */
 	strd	r8, [r1], #0x08		/* ST:58-5f */
 	ldrt	r8, [r0], #0x04		/* LD:70-73 */
 	ldrt	r9, [r0], #0x04		/* LD:74-77 */
 	strd	r4, [r1], #0x08		/* ST:60-67 */
 	ldrt	r4, [r0], #0x04		/* LD:78-7b */
 	ldrt	r5, [r0], #0x04		/* LD:7c-7f */
 	strd	r6, [r1], #0x08		/* ST:68-6f */
 	strd	r8, [r1], #0x08		/* ST:70-77 */
 	subs	r2, r2, #0x80
 	strd	r4, [r1], #0x08		/* ST:78-7f */
 	bge	.Lcopyin_w_loop128
 
 .Lcopyin_w_lessthan128:
 	adds	r2, r2, #0x80		/* Adjust for extra sub */
 	ldmfdeq	sp!, {r4-r9}
 	RETeq
 	subs	r2, r2, #0x20
 	blt	.Lcopyin_w_lessthan32
 
 	/* Copy 32 bytes at a time */
 .Lcopyin_w_loop32:
 	ldrt	r4, [r0], #0x04
 	ldrt	r5, [r0], #0x04
 	pld	[r0, #0x18]
 	ldrt	r6, [r0], #0x04
 	ldrt	r7, [r0], #0x04
 	ldrt	r8, [r0], #0x04
 	ldrt	r9, [r0], #0x04
 	strd	r4, [r1], #0x08
 	ldrt	r4, [r0], #0x04
 	ldrt	r5, [r0], #0x04
 	strd	r6, [r1], #0x08
 	strd	r8, [r1], #0x08
 	subs	r2, r2, #0x20
 	strd	r4, [r1], #0x08
 	bge	.Lcopyin_w_loop32
 
 .Lcopyin_w_lessthan32:
 	adds	r2, r2, #0x20		/* Adjust for extra sub */
 	ldmfdeq	sp!, {r4-r9}
 	RETeq				/* Return now if done */
 
 	and	r4, r2, #0x18
 	rsb	r5, r4, #0x18
 	subs	r2, r2, r4
 	add	pc, pc, r5, lsl #1
 	nop
 
 	/* At least 24 bytes remaining */
 	ldrt	r4, [r0], #0x04
 	ldrt	r5, [r0], #0x04
 	nop
 	strd	r4, [r1], #0x08
 
 	/* At least 16 bytes remaining */
 	ldrt	r4, [r0], #0x04
 	ldrt	r5, [r0], #0x04
 	nop
 	strd	r4, [r1], #0x08
 
 	/* At least 8 bytes remaining */
 	ldrt	r4, [r0], #0x04
 	ldrt	r5, [r0], #0x04
 	nop
 	strd	r4, [r1], #0x08
 
 	/* Less than 8 bytes remaining */
 	ldmfd	sp!, {r4-r9}
 	RETeq				/* Return now if done */
 	mov	r3, #0x00
 
 .Lcopyin_w_less_than8:
 	subs	r2, r2, #0x04
 	ldrtge	ip, [r0], #0x04
 	strge	ip, [r1], #0x04
 	RETeq				/* Return now if done */
 	addlt	r2, r2, #0x04
 	ldrbt	ip, [r0], #0x01
 	cmp	r2, #0x02
 	ldrbtge	r2, [r0], #0x01
 	strb	ip, [r1], #0x01
 	ldrbtgt	ip, [r0]
 	strbge	r2, [r1], #0x01
 	strbgt	ip, [r1]
 	RET
 
 /*
  * At this point, it has not been possible to word align both buffers.
  * The destination buffer (r1) is word aligned, but the source buffer
  * (r0) is not.
  */
 .Lcopyin_bad_align:
 	stmfd	sp!, {r4-r7}
 	mov	r3, #0x01
 	bic	r0, r0, #0x03
 	cmp	ip, #2
 	ldrt	ip, [r0], #0x04
 	bgt	.Lcopyin_bad3
 	beq	.Lcopyin_bad2
 	b	.Lcopyin_bad1
 
 .Lcopyin_bad1_loop16:
 	mov	r4, ip, lsr #8
 	ldrt	r5, [r0], #0x04
 	pld	[r0, #0x018]
 	ldrt	r6, [r0], #0x04
 	ldrt	r7, [r0], #0x04
 	ldrt	ip, [r0], #0x04
 	orr	r4, r4, r5, lsl #24
 	mov	r5, r5, lsr #8
 	orr	r5, r5, r6, lsl #24
 	mov	r6, r6, lsr #8
 	orr	r6, r6, r7, lsl #24
 	mov	r7, r7, lsr #8
 	orr	r7, r7, ip, lsl #24
 	str	r4, [r1], #0x04
 	str	r5, [r1], #0x04
 	str	r6, [r1], #0x04
 	str	r7, [r1], #0x04
 .Lcopyin_bad1:
 	subs	r2, r2, #0x10
 	bge	.Lcopyin_bad1_loop16
 
 	adds	r2, r2, #0x10
 	ldmfdeq	sp!, {r4-r7}
 	RETeq				/* Return now if done */
 	subs	r2, r2, #0x04
 	sublt	r0, r0, #0x03
 	blt	.Lcopyin_l4
 
 .Lcopyin_bad1_loop4:
 	mov	r4, ip, lsr #8
 	ldrt	ip, [r0], #0x04
 	subs	r2, r2, #0x04
 	orr	r4, r4, ip, lsl #24
 	str	r4, [r1], #0x04
 	bge	.Lcopyin_bad1_loop4
 	sub	r0, r0, #0x03
 	b	.Lcopyin_l4
 
 .Lcopyin_bad2_loop16:
 	mov	r4, ip, lsr #16
 	ldrt	r5, [r0], #0x04
 	pld	[r0, #0x018]
 	ldrt	r6, [r0], #0x04
 	ldrt	r7, [r0], #0x04
 	ldrt	ip, [r0], #0x04
 	orr	r4, r4, r5, lsl #16
 	mov	r5, r5, lsr #16
 	orr	r5, r5, r6, lsl #16
 	mov	r6, r6, lsr #16
 	orr	r6, r6, r7, lsl #16
 	mov	r7, r7, lsr #16
 	orr	r7, r7, ip, lsl #16
 	str	r4, [r1], #0x04
 	str	r5, [r1], #0x04
 	str	r6, [r1], #0x04
 	str	r7, [r1], #0x04
 .Lcopyin_bad2:
 	subs	r2, r2, #0x10
 	bge	.Lcopyin_bad2_loop16
 
 	adds	r2, r2, #0x10
 	ldmfdeq	sp!, {r4-r7}
 	RETeq				/* Return now if done */
 	subs	r2, r2, #0x04
 	sublt	r0, r0, #0x02
 	blt	.Lcopyin_l4
 
 .Lcopyin_bad2_loop4:
 	mov	r4, ip, lsr #16
 	ldrt	ip, [r0], #0x04
 	subs	r2, r2, #0x04
 	orr	r4, r4, ip, lsl #16
 	str	r4, [r1], #0x04
 	bge	.Lcopyin_bad2_loop4
 	sub	r0, r0, #0x02
 	b	.Lcopyin_l4
 
 .Lcopyin_bad3_loop16:
 	mov	r4, ip, lsr #24
 	ldrt	r5, [r0], #0x04
 	pld	[r0, #0x018]
 	ldrt	r6, [r0], #0x04
 	ldrt	r7, [r0], #0x04
 	ldrt	ip, [r0], #0x04
 	orr	r4, r4, r5, lsl #8
 	mov	r5, r5, lsr #24
 	orr	r5, r5, r6, lsl #8
 	mov	r6, r6, lsr #24
 	orr	r6, r6, r7, lsl #8
 	mov	r7, r7, lsr #24
 	orr	r7, r7, ip, lsl #8
 	str	r4, [r1], #0x04
 	str	r5, [r1], #0x04
 	str	r6, [r1], #0x04
 	str	r7, [r1], #0x04
 .Lcopyin_bad3:
 	subs	r2, r2, #0x10
 	bge	.Lcopyin_bad3_loop16
 
 	adds	r2, r2, #0x10
 	ldmfdeq	sp!, {r4-r7}
 	RETeq				/* Return now if done */
 	subs	r2, r2, #0x04
 	sublt	r0, r0, #0x01
 	blt	.Lcopyin_l4
 
 .Lcopyin_bad3_loop4:
 	mov	r4, ip, lsr #24
 	ldrt	ip, [r0], #0x04
 	subs	r2, r2, #0x04
 	orr	r4, r4, ip, lsl #8
 	str	r4, [r1], #0x04
 	bge	.Lcopyin_bad3_loop4
 	sub	r0, r0, #0x01
 
 .Lcopyin_l4:
 	ldmfd	sp!, {r4-r7}
 	mov	r3, #0x00
 	adds	r2, r2, #0x04
 	RETeq
 .Lcopyin_l4_2:
 	rsbs	r2, r2, #0x03
 	addne	pc, pc, r2, lsl #3
 	nop
 	ldrbt	ip, [r0], #0x01
 	strb	ip, [r1], #0x01
 	ldrbt	ip, [r0], #0x01
 	strb	ip, [r1], #0x01
 	ldrbt	ip, [r0]
 	strb	ip, [r1]
 	RET
 END(copyin)
 
 /*
  * r0 = kernel space address
  * r1 = user space address
  * r2 = length
  *
  * Copies bytes from kernel space to user space
  */
 ENTRY(copyout)
 	cmp	r2, #0x00
 	movle	r0, #0x00
 	movle	pc, lr			/* Bail early if length is <= 0 */
 
 	adds	r3, r1, r2
 	movcs	r0, #EFAULT
 	RETc(cs)
 
 	ldr	r12, =(VM_MAXUSER_ADDRESS + 1)
 	cmp	r3, r12
 	movcs	r0, #EFAULT
 	RETc(cs)
 
-	ldr	r3, .L_arm_memcpy
-	ldr	r3, [r3]
-	cmp	r3, #0
-	beq	.Lnormale
-	ldr	r3, .L_min_memcpy_size
-	ldr	r3, [r3]
-	cmp	r2, r3
-	blt	.Lnormale
-	stmfd	sp!, {r0-r2, r4, lr}
-	mov     r3, r0
-	mov     r0, r1
-	mov     r1, r3
-	mov     r3, #1 /* DST_IS_USER */
-	ldr	r4, .L_arm_memcpy
-	mov	lr, pc
-	ldr	pc, [r4]
-	cmp     r0, #0
-	ldmfd   sp!, {r0-r2, r4, lr}
-	moveq	r0, #0
-	RETeq
-
-.Lnormale:
 	stmfd	sp!, {r10-r11, lr}
 
 	GET_PCB(r10)
 	ldr	r10, [r10]
 
 	mov	r3, #0x00
 	adr	ip, .Lcopyout_fault
 	ldr	r11, [r10, #PCB_ONFAULT]
 	str	ip, [r10, #PCB_ONFAULT]
 	bl	.Lcopyout_guts
 	str	r11, [r10, #PCB_ONFAULT]
 	mov	r0, #0x00
 	ldmfd	sp!, {r10-r11, pc}
 
 .Lcopyout_fault:
 	ldr	r0, =EFAULT
 	str	r11, [r10, #PCB_ONFAULT]
 	cmp	r3, #0x00
 	ldmfdgt	sp!, {r4-r7}		/* r3 > 0 Restore r4-r7 */
 	ldmfdlt	sp!, {r4-r9}		/* r3 < 0 Restore r4-r9 */
 	ldmfd	sp!, {r10-r11, pc}
 
 .Lcopyout_guts:
 	pld	[r0]
 	/* Word-align the destination buffer */
 	ands	ip, r1, #0x03		/* Already word aligned? */
 	beq	.Lcopyout_wordaligned	/* Yup */
 	rsb	ip, ip, #0x04
 	cmp	r2, ip			/* Enough bytes left to align it? */
 	blt	.Lcopyout_l4_2		/* Nope. Just copy bytewise */
 	sub	r2, r2, ip
 	rsbs	ip, ip, #0x03
 	addne	pc, pc, ip, lsl #3
 	nop
 	ldrb	ip, [r0], #0x01
 	strbt	ip, [r1], #0x01
 	ldrb	ip, [r0], #0x01
 	strbt	ip, [r1], #0x01
 	ldrb	ip, [r0], #0x01
 	strbt	ip, [r1], #0x01
 	cmp	r2, #0x00		/* All done? */
 	RETeq
 
 	/* Destination buffer is now word aligned */
 .Lcopyout_wordaligned:
 	ands	ip, r0, #0x03		/* Is src also word-aligned? */
 	bne	.Lcopyout_bad_align	/* Nope. Things just got bad */
 	cmp	r2, #0x08		/* Less than 8 bytes remaining? */
 	blt	.Lcopyout_w_less_than8
 
 	/* Quad-align the destination buffer */
 	tst	r0, #0x07		/* Already quad aligned? */
 	ldrne	ip, [r0], #0x04
 	subne	r2, r2, #0x04
 	strtne	ip, [r1], #0x04
 
 	stmfd	sp!, {r4-r9}		/* Free up some registers */
 	mov	r3, #-1			/* Signal restore r4-r9 */
 
 	/* Destination buffer word aligned, source is quad aligned */
 	subs	r2, r2, #0x80
 	blt	.Lcopyout_w_lessthan128
 
 	/* Copy 128 bytes at a time */
 .Lcopyout_w_loop128:
 	ldrd	r4, [r0], #0x08		/* LD:00-07 */
 	pld	[r0, #0x18]		/* Prefetch 0x20 */
 	ldrd	r6, [r0], #0x08		/* LD:08-0f */
 	ldrd	r8, [r0], #0x08		/* LD:10-17 */
 	strt	r4, [r1], #0x04		/* ST:00-03 */
 	strt	r5, [r1], #0x04		/* ST:04-07 */
 	ldrd	r4, [r0], #0x08		/* LD:18-1f */
 	strt	r6, [r1], #0x04		/* ST:08-0b */
 	strt	r7, [r1], #0x04		/* ST:0c-0f */
 	ldrd	r6, [r0], #0x08		/* LD:20-27 */
 	pld	[r0, #0x18]		/* Prefetch 0x40 */
 	strt	r8, [r1], #0x04		/* ST:10-13 */
 	strt	r9, [r1], #0x04		/* ST:14-17 */
 	ldrd	r8, [r0], #0x08		/* LD:28-2f */
 	strt	r4, [r1], #0x04		/* ST:18-1b */
 	strt	r5, [r1], #0x04		/* ST:1c-1f */
 	ldrd	r4, [r0], #0x08		/* LD:30-37 */
 	strt	r6, [r1], #0x04		/* ST:20-23 */
 	strt	r7, [r1], #0x04		/* ST:24-27 */
 	ldrd	r6, [r0], #0x08		/* LD:38-3f */
 	strt	r8, [r1], #0x04		/* ST:28-2b */
 	strt	r9, [r1], #0x04		/* ST:2c-2f */
 	ldrd	r8, [r0], #0x08		/* LD:40-47 */
 	pld	[r0, #0x18]		/* Prefetch 0x60 */
 	strt	r4, [r1], #0x04		/* ST:30-33 */
 	strt	r5, [r1], #0x04		/* ST:34-37 */
 	ldrd	r4, [r0], #0x08		/* LD:48-4f */
 	strt	r6, [r1], #0x04		/* ST:38-3b */
 	strt	r7, [r1], #0x04		/* ST:3c-3f */
 	ldrd	r6, [r0], #0x08		/* LD:50-57 */
 	strt	r8, [r1], #0x04		/* ST:40-43 */
 	strt	r9, [r1], #0x04		/* ST:44-47 */
 	ldrd	r8, [r0], #0x08		/* LD:58-4f */
 	strt	r4, [r1], #0x04		/* ST:48-4b */
 	strt	r5, [r1], #0x04		/* ST:4c-4f */
 	ldrd	r4, [r0], #0x08		/* LD:60-67 */
 	pld	[r0, #0x18]		/* Prefetch 0x80 */
 	strt	r6, [r1], #0x04		/* ST:50-53 */
 	strt	r7, [r1], #0x04		/* ST:54-57 */
 	ldrd	r6, [r0], #0x08		/* LD:68-6f */
 	strt	r8, [r1], #0x04		/* ST:58-5b */
 	strt	r9, [r1], #0x04		/* ST:5c-5f */
 	ldrd	r8, [r0], #0x08		/* LD:70-77 */
 	strt	r4, [r1], #0x04		/* ST:60-63 */
 	strt	r5, [r1], #0x04		/* ST:64-67 */
 	ldrd	r4, [r0], #0x08		/* LD:78-7f */
 	strt	r6, [r1], #0x04		/* ST:68-6b */
 	strt	r7, [r1], #0x04		/* ST:6c-6f */
 	strt	r8, [r1], #0x04		/* ST:70-73 */
 	strt	r9, [r1], #0x04		/* ST:74-77 */
 	subs	r2, r2, #0x80
 	strt	r4, [r1], #0x04		/* ST:78-7b */
 	strt	r5, [r1], #0x04		/* ST:7c-7f */
 	bge	.Lcopyout_w_loop128
 
 .Lcopyout_w_lessthan128:
 	adds	r2, r2, #0x80		/* Adjust for extra sub */
 	ldmfdeq	sp!, {r4-r9}
 	RETeq				/* Return now if done */
 	subs	r2, r2, #0x20
 	blt	.Lcopyout_w_lessthan32
 
 	/* Copy 32 bytes at a time */
 .Lcopyout_w_loop32:
 	ldrd	r4, [r0], #0x08
 	pld	[r0, #0x18]
 	ldrd	r6, [r0], #0x08
 	ldrd	r8, [r0], #0x08
 	strt	r4, [r1], #0x04
 	strt	r5, [r1], #0x04
 	ldrd	r4, [r0], #0x08
 	strt	r6, [r1], #0x04
 	strt	r7, [r1], #0x04
 	strt	r8, [r1], #0x04
 	strt	r9, [r1], #0x04
 	subs	r2, r2, #0x20
 	strt	r4, [r1], #0x04
 	strt	r5, [r1], #0x04
 	bge	.Lcopyout_w_loop32
 
 .Lcopyout_w_lessthan32:
 	adds	r2, r2, #0x20		/* Adjust for extra sub */
 	ldmfdeq	sp!, {r4-r9}
 	RETeq				/* Return now if done */
 
 	and	r4, r2, #0x18
 	rsb	r5, r4, #0x18
 	subs	r2, r2, r4
 	add	pc, pc, r5, lsl #1
 	nop
 
 	/* At least 24 bytes remaining */
 	ldrd	r4, [r0], #0x08
 	strt	r4, [r1], #0x04
 	strt	r5, [r1], #0x04
 	nop
 
 	/* At least 16 bytes remaining */
 	ldrd	r4, [r0], #0x08
 	strt	r4, [r1], #0x04
 	strt	r5, [r1], #0x04
 	nop
 
 	/* At least 8 bytes remaining */
 	ldrd	r4, [r0], #0x08
 	strt	r4, [r1], #0x04
 	strt	r5, [r1], #0x04
 	nop
 
 	/* Less than 8 bytes remaining */
 	ldmfd	sp!, {r4-r9}
 	RETeq				/* Return now if done */
 	mov	r3, #0x00
 
 .Lcopyout_w_less_than8:
 	subs	r2, r2, #0x04
 	ldrge	ip, [r0], #0x04
 	strtge	ip, [r1], #0x04
 	RETeq				/* Return now if done */
 	addlt	r2, r2, #0x04
 	ldrb	ip, [r0], #0x01
 	cmp	r2, #0x02
 	ldrbge	r2, [r0], #0x01
 	strbt	ip, [r1], #0x01
 	ldrbgt	ip, [r0]
 	strbtge	r2, [r1], #0x01
 	strbtgt	ip, [r1]
 	RET
 
 /*
  * At this point, it has not been possible to word align both buffers.
  * The destination buffer (r1) is word aligned, but the source buffer
  * (r0) is not.
  */
 .Lcopyout_bad_align:
 	stmfd	sp!, {r4-r7}
 	mov	r3, #0x01
 	bic	r0, r0, #0x03
 	cmp	ip, #2
 	ldr	ip, [r0], #0x04
 	bgt	.Lcopyout_bad3
 	beq	.Lcopyout_bad2
 	b	.Lcopyout_bad1
 
 .Lcopyout_bad1_loop16:
 	mov	r4, ip, lsr #8
 	ldr	r5, [r0], #0x04
 	pld	[r0, #0x018]
 	ldr	r6, [r0], #0x04
 	ldr	r7, [r0], #0x04
 	ldr	ip, [r0], #0x04
 	orr	r4, r4, r5, lsl #24
 	mov	r5, r5, lsr #8
 	orr	r5, r5, r6, lsl #24
 	mov	r6, r6, lsr #8
 	orr	r6, r6, r7, lsl #24
 	mov	r7, r7, lsr #8
 	orr	r7, r7, ip, lsl #24
 	strt	r4, [r1], #0x04
 	strt	r5, [r1], #0x04
 	strt	r6, [r1], #0x04
 	strt	r7, [r1], #0x04
 .Lcopyout_bad1:
 	subs	r2, r2, #0x10
 	bge	.Lcopyout_bad1_loop16
 
 	adds	r2, r2, #0x10
 	ldmfdeq	sp!, {r4-r7}
 	RETeq				/* Return now if done */
 	subs	r2, r2, #0x04
 	sublt	r0, r0, #0x03
 	blt	.Lcopyout_l4
 
 .Lcopyout_bad1_loop4:
 	mov	r4, ip, lsr #8
 	ldr	ip, [r0], #0x04
 	subs	r2, r2, #0x04
 	orr	r4, r4, ip, lsl #24
 	strt	r4, [r1], #0x04
 	bge	.Lcopyout_bad1_loop4
 	sub	r0, r0, #0x03
 	b	.Lcopyout_l4
 
 .Lcopyout_bad2_loop16:
 	mov	r4, ip, lsr #16
 	ldr	r5, [r0], #0x04
 	pld	[r0, #0x018]
 	ldr	r6, [r0], #0x04
 	ldr	r7, [r0], #0x04
 	ldr	ip, [r0], #0x04
 	orr	r4, r4, r5, lsl #16
 	mov	r5, r5, lsr #16
 	orr	r5, r5, r6, lsl #16
 	mov	r6, r6, lsr #16
 	orr	r6, r6, r7, lsl #16
 	mov	r7, r7, lsr #16
 	orr	r7, r7, ip, lsl #16
 	strt	r4, [r1], #0x04
 	strt	r5, [r1], #0x04
 	strt	r6, [r1], #0x04
 	strt	r7, [r1], #0x04
 .Lcopyout_bad2:
 	subs	r2, r2, #0x10
 	bge	.Lcopyout_bad2_loop16
 
 	adds	r2, r2, #0x10
 	ldmfdeq	sp!, {r4-r7}
 	RETeq				/* Return now if done */
 	subs	r2, r2, #0x04
 	sublt	r0, r0, #0x02
 	blt	.Lcopyout_l4
 
 .Lcopyout_bad2_loop4:
 	mov	r4, ip, lsr #16
 	ldr	ip, [r0], #0x04
 	subs	r2, r2, #0x04
 	orr	r4, r4, ip, lsl #16
 	strt	r4, [r1], #0x04
 	bge	.Lcopyout_bad2_loop4
 	sub	r0, r0, #0x02
 	b	.Lcopyout_l4
 
 .Lcopyout_bad3_loop16:
 	mov	r4, ip, lsr #24
 	ldr	r5, [r0], #0x04
 	pld	[r0, #0x018]
 	ldr	r6, [r0], #0x04
 	ldr	r7, [r0], #0x04
 	ldr	ip, [r0], #0x04
 	orr	r4, r4, r5, lsl #8
 	mov	r5, r5, lsr #24
 	orr	r5, r5, r6, lsl #8
 	mov	r6, r6, lsr #24
 	orr	r6, r6, r7, lsl #8
 	mov	r7, r7, lsr #24
 	orr	r7, r7, ip, lsl #8
 	strt	r4, [r1], #0x04
 	strt	r5, [r1], #0x04
 	strt	r6, [r1], #0x04
 	strt	r7, [r1], #0x04
 .Lcopyout_bad3:
 	subs	r2, r2, #0x10
 	bge	.Lcopyout_bad3_loop16
 
 	adds	r2, r2, #0x10
 	ldmfdeq	sp!, {r4-r7}
 	RETeq				/* Return now if done */
 	subs	r2, r2, #0x04
 	sublt	r0, r0, #0x01
 	blt	.Lcopyout_l4
 
 .Lcopyout_bad3_loop4:
 	mov	r4, ip, lsr #24
 	ldr	ip, [r0], #0x04
 	subs	r2, r2, #0x04
 	orr	r4, r4, ip, lsl #8
 	strt	r4, [r1], #0x04
 	bge	.Lcopyout_bad3_loop4
 	sub	r0, r0, #0x01
 
 .Lcopyout_l4:
 	ldmfd	sp!, {r4-r7}
 	mov	r3, #0x00
 	adds	r2, r2, #0x04
 	RETeq
 .Lcopyout_l4_2:
 	rsbs	r2, r2, #0x03
 	addne	pc, pc, r2, lsl #3
 	nop
 	ldrb	ip, [r0], #0x01
 	strbt	ip, [r1], #0x01
 	ldrb	ip, [r0], #0x01
 	strbt	ip, [r1], #0x01
 	ldrb	ip, [r0]
 	strbt	ip, [r1]
 	RET
 END(copyout)
 
diff --git a/sys/arm/arm/machdep.c b/sys/arm/arm/machdep.c
index 3d966527599d..4f26c6e9ebf9 100644
--- a/sys/arm/arm/machdep.c
+++ b/sys/arm/arm/machdep.c
@@ -1,958 +1,953 @@
 /*	$NetBSD: arm32_machdep.c,v 1.44 2004/03/24 15:34:47 atatat Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (c) 2004 Olivier Houchard
  * Copyright (c) 1994-1998 Mark Brinicombe.
  * Copyright (c) 1994 Brini.
  * All rights reserved.
  *
  * This code is derived from software written for Brini by Mark Brinicombe
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Mark Brinicombe
  *	for the NetBSD Project.
  * 4. The name of the company nor the name of the author may be used to
  *    endorse or promote products derived from this software without specific
  *    prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Machine dependent functions for kernel setup
  *
  * Created      : 17/09/94
  * Updated	: 18/04/01 updated for new wscons
  */
 
 #include "opt_ddb.h"
 #include "opt_kstack_pages.h"
 #include "opt_platform.h"
 #include "opt_sched.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/devmap.h>
 #include <sys/efi.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/msgbuf.h>
 #include <sys/physmem.h>
 #include <sys/reboot.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 
 #include <machine/asm.h>
 #include <machine/debug_monitor.h>
 #include <machine/machdep.h>
 #include <machine/metadata.h>
 #include <machine/pcb.h>
 #include <machine/platform.h>
 #include <machine/sysarch.h>
 #include <machine/undefined.h>
 #include <machine/vfp.h>
 #include <machine/vmparam.h>
 
 #ifdef FDT
 #include <dev/fdt/fdt_common.h>
 #include <machine/ofw_machdep.h>
 #endif
 
 #ifdef DEBUG
 #define	debugf(fmt, args...) printf(fmt, ##args)
 #else
 #define	debugf(fmt, args...)
 #endif
 
 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) || \
     defined(COMPAT_FREEBSD9)
 #error FreeBSD/arm doesn't provide compatibility with releases prior to 10
 #endif
 
 
 #if __ARM_ARCH < 6
 #error FreeBSD requires ARMv6 or later
 #endif
 
 struct pcpu __pcpu[MAXCPU];
 struct pcpu *pcpup = &__pcpu[0];
 
 static struct trapframe proc0_tf;
 uint32_t cpu_reset_address = 0;
 int cold = 1;
 vm_offset_t vector_page;
 
 /* The address at which the kernel was loaded.  Set early in initarm(). */
 vm_paddr_t arm_physmem_kernaddr;
 
-int (*_arm_memcpy)(void *, void *, int, int) = NULL;
-int (*_arm_bzero)(void *, int, int) = NULL;
-int _min_memcpy_size = 0;
-int _min_bzero_size = 0;
-
 extern int *end;
 
 #ifdef FDT
 vm_paddr_t pmap_pa;
 vm_offset_t systempage;
 vm_offset_t irqstack;
 vm_offset_t undstack;
 vm_offset_t abtstack;
 #endif /* FDT */
 
 #ifdef PLATFORM
 static delay_func *delay_impl;
 static void *delay_arg;
 #endif
 
 struct kva_md_info kmi;
 /*
  * arm32_vector_init:
  *
  *	Initialize the vector page, and select whether or not to
  *	relocate the vectors.
  *
  *	NOTE: We expect the vector page to be mapped at its expected
  *	destination.
  */
 
 extern unsigned int page0[], page0_data[];
 void
 arm_vector_init(vm_offset_t va, int which)
 {
 	unsigned int *vectors = (int *) va;
 	unsigned int *vectors_data = vectors + (page0_data - page0);
 	int vec;
 
 	/*
 	 * Loop through the vectors we're taking over, and copy the
 	 * vector's insn and data word.
 	 */
 	for (vec = 0; vec < ARM_NVEC; vec++) {
 		if ((which & (1 << vec)) == 0) {
 			/* Don't want to take over this vector. */
 			continue;
 		}
 		vectors[vec] = page0[vec];
 		vectors_data[vec] = page0_data[vec];
 	}
 
 	/* Now sync the vectors. */
 	icache_sync(va, (ARM_NVEC * 2) * sizeof(u_int));
 
 	vector_page = va;
 }
 
 static void
 cpu_startup(void *dummy)
 {
 	struct pcb *pcb = thread0.td_pcb;
 	const unsigned int mbyte = 1024 * 1024;
 
 	identify_arm_cpu();
 
 	vm_ksubmap_init(&kmi);
 
 	/*
 	 * Display the RAM layout.
 	 */
 	printf("real memory  = %ju (%ju MB)\n",
 	    (uintmax_t)arm32_ptob(realmem),
 	    (uintmax_t)arm32_ptob(realmem) / mbyte);
 	printf("avail memory = %ju (%ju MB)\n",
 	    (uintmax_t)arm32_ptob(vm_free_count()),
 	    (uintmax_t)arm32_ptob(vm_free_count()) / mbyte);
 	if (bootverbose) {
 		physmem_print_tables();
 		devmap_print_table();
 	}
 
 	bufinit();
 	vm_pager_bufferinit();
 	pcb->pcb_regs.sf_sp = (u_int)thread0.td_kstack +
 	    USPACE_SVC_STACK_TOP;
 	pmap_set_pcb_pagedir(kernel_pmap, pcb);
 }
 
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 
 /*
  * Flush the D-cache for non-DMA I/O so that the I-cache can
  * be made coherent later.
  */
 void
 cpu_flush_dcache(void *ptr, size_t len)
 {
 
 	dcache_wb_poc((vm_offset_t)ptr, (vm_paddr_t)vtophys(ptr), len);
 }
 
 /* Get current clock frequency for the given cpu id. */
 int
 cpu_est_clockrate(int cpu_id, uint64_t *rate)
 {
 	struct pcpu *pc;
 
 	pc = pcpu_find(cpu_id);
 	if (pc == NULL || rate == NULL)
 		return (EINVAL);
 
 	if (pc->pc_clock == 0)
 		return (EOPNOTSUPP);
 
 	*rate = pc->pc_clock;
 
 	return (0);
 }
 
 void
 cpu_idle(int busy)
 {
 
 	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu);
 	spinlock_enter();
 	if (!busy)
 		cpu_idleclock();
 	if (!sched_runnable())
 		cpu_sleep(0);
 	if (!busy)
 		cpu_activeclock();
 	spinlock_exit();
 	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", busy, curcpu);
 }
 
 int
 cpu_idle_wakeup(int cpu)
 {
 
 	return (0);
 }
 
 void
 cpu_initclocks(void)
 {
 
 #ifdef SMP
 	if (PCPU_GET(cpuid) == 0)
 		cpu_initclocks_bsp();
 	else
 		cpu_initclocks_ap();
 #else
 	cpu_initclocks_bsp();
 #endif
 }
 
 #ifdef PLATFORM
 void
 arm_set_delay(delay_func *impl, void *arg)
 {
 
 	KASSERT(impl != NULL, ("No DELAY implementation"));
 	delay_impl = impl;
 	delay_arg = arg;
 }
 
 void
 DELAY(int usec)
 {
 
 	TSENTER();
 	delay_impl(usec, delay_arg);
 	TSEXIT();
 }
 #endif
 
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 {
 
 	pcpu->pc_mpidr = 0xffffffff;
 }
 
 void
 spinlock_enter(void)
 {
 	struct thread *td;
 	register_t cspr;
 
 	td = curthread;
 	if (td->td_md.md_spinlock_count == 0) {
 		cspr = disable_interrupts(PSR_I | PSR_F);
 		td->td_md.md_spinlock_count = 1;
 		td->td_md.md_saved_cspr = cspr;
 		critical_enter();
 	} else
 		td->td_md.md_spinlock_count++;
 }
 
 void
 spinlock_exit(void)
 {
 	struct thread *td;
 	register_t cspr;
 
 	td = curthread;
 	cspr = td->td_md.md_saved_cspr;
 	td->td_md.md_spinlock_count--;
 	if (td->td_md.md_spinlock_count == 0) {
 		critical_exit();
 		restore_interrupts(cspr);
 	}
 }
 
 /*
  * Clear registers on exec
  */
 void
 exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
 {
 	struct trapframe *tf = td->td_frame;
 
 	memset(tf, 0, sizeof(*tf));
 	tf->tf_usr_sp = stack;
 	tf->tf_usr_lr = imgp->entry_addr;
 	tf->tf_svc_lr = 0x77777777;
 	tf->tf_pc = imgp->entry_addr;
 	tf->tf_spsr = PSR_USR32_MODE;
 }
 
 #ifdef VFP
 /*
  * Get machine VFP context.
  */
 void
 get_vfpcontext(struct thread *td, mcontext_vfp_t *vfp)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	if (td == curthread) {
 		critical_enter();
 		vfp_store(&pcb->pcb_vfpstate, false);
 		critical_exit();
 	} else
 		MPASS(TD_IS_SUSPENDED(td));
 	memcpy(vfp->mcv_reg, pcb->pcb_vfpstate.reg,
 	    sizeof(vfp->mcv_reg));
 	vfp->mcv_fpscr = pcb->pcb_vfpstate.fpscr;
 }
 
 /*
  * Set machine VFP context.
  */
 void
 set_vfpcontext(struct thread *td, mcontext_vfp_t *vfp)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	if (td == curthread) {
 		critical_enter();
 		vfp_discard(td);
 		critical_exit();
 	} else
 		MPASS(TD_IS_SUSPENDED(td));
 	memcpy(pcb->pcb_vfpstate.reg, vfp->mcv_reg,
 	    sizeof(pcb->pcb_vfpstate.reg));
 	pcb->pcb_vfpstate.fpscr = vfp->mcv_fpscr;
 }
 #endif
 
 int
 arm_get_vfpstate(struct thread *td, void *args)
 {
 	int rv;
 	struct arm_get_vfpstate_args ua;
 	mcontext_vfp_t	mcontext_vfp;
 
 	rv = copyin(args, &ua, sizeof(ua));
 	if (rv != 0)
 		return (rv);
 	if (ua.mc_vfp_size != sizeof(mcontext_vfp_t))
 		return (EINVAL);
 #ifdef VFP
 	get_vfpcontext(td, &mcontext_vfp);
 #else
 	bzero(&mcontext_vfp, sizeof(mcontext_vfp));
 #endif
 
 	rv = copyout(&mcontext_vfp, ua.mc_vfp,  sizeof(mcontext_vfp));
 	if (rv != 0)
 		return (rv);
 	return (0);
 }
 
 /*
  * Get machine context.
  */
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret)
 {
 	struct trapframe *tf = td->td_frame;
 	__greg_t *gr = mcp->__gregs;
 
 	if (clear_ret & GET_MC_CLEAR_RET) {
 		gr[_REG_R0] = 0;
 		gr[_REG_CPSR] = tf->tf_spsr & ~PSR_C;
 	} else {
 		gr[_REG_R0]   = tf->tf_r0;
 		gr[_REG_CPSR] = tf->tf_spsr;
 	}
 	gr[_REG_R1]   = tf->tf_r1;
 	gr[_REG_R2]   = tf->tf_r2;
 	gr[_REG_R3]   = tf->tf_r3;
 	gr[_REG_R4]   = tf->tf_r4;
 	gr[_REG_R5]   = tf->tf_r5;
 	gr[_REG_R6]   = tf->tf_r6;
 	gr[_REG_R7]   = tf->tf_r7;
 	gr[_REG_R8]   = tf->tf_r8;
 	gr[_REG_R9]   = tf->tf_r9;
 	gr[_REG_R10]  = tf->tf_r10;
 	gr[_REG_R11]  = tf->tf_r11;
 	gr[_REG_R12]  = tf->tf_r12;
 	gr[_REG_SP]   = tf->tf_usr_sp;
 	gr[_REG_LR]   = tf->tf_usr_lr;
 	gr[_REG_PC]   = tf->tf_pc;
 
 	mcp->mc_vfp_size = 0;
 	mcp->mc_vfp_ptr = NULL;
 	memset(&mcp->mc_spare, 0, sizeof(mcp->mc_spare));
 
 	return (0);
 }
 
 /*
  * Set machine context.
  *
  * However, we don't set any but the user modifiable flags, and we won't
  * touch the cs selector.
  */
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	mcontext_vfp_t mc_vfp, *vfp;
 	struct trapframe *tf = td->td_frame;
 	const __greg_t *gr = mcp->__gregs;
 	int spsr;
 
 	/*
 	 * Make sure the processor mode has not been tampered with and
 	 * interrupts have not been disabled.
 	 */
 	spsr = gr[_REG_CPSR];
 	if ((spsr & PSR_MODE) != PSR_USR32_MODE ||
 	    (spsr & (PSR_I | PSR_F)) != 0)
 		return (EINVAL);
 
 #ifdef WITNESS
 	if (mcp->mc_vfp_size != 0 && mcp->mc_vfp_size != sizeof(mc_vfp)) {
 		printf("%s: %s: Malformed mc_vfp_size: %d (0x%08X)\n",
 		    td->td_proc->p_comm, __func__,
 		    mcp->mc_vfp_size, mcp->mc_vfp_size);
 	} else if (mcp->mc_vfp_size != 0 && mcp->mc_vfp_ptr == NULL) {
 		printf("%s: %s: c_vfp_size != 0 but mc_vfp_ptr == NULL\n",
 		    td->td_proc->p_comm, __func__);
 	}
 #endif
 
 	if (mcp->mc_vfp_size == sizeof(mc_vfp) && mcp->mc_vfp_ptr != NULL) {
 		if (copyin(mcp->mc_vfp_ptr, &mc_vfp, sizeof(mc_vfp)) != 0)
 			return (EFAULT);
 		vfp = &mc_vfp;
 	} else {
 		vfp = NULL;
 	}
 
 	tf->tf_r0 = gr[_REG_R0];
 	tf->tf_r1 = gr[_REG_R1];
 	tf->tf_r2 = gr[_REG_R2];
 	tf->tf_r3 = gr[_REG_R3];
 	tf->tf_r4 = gr[_REG_R4];
 	tf->tf_r5 = gr[_REG_R5];
 	tf->tf_r6 = gr[_REG_R6];
 	tf->tf_r7 = gr[_REG_R7];
 	tf->tf_r8 = gr[_REG_R8];
 	tf->tf_r9 = gr[_REG_R9];
 	tf->tf_r10 = gr[_REG_R10];
 	tf->tf_r11 = gr[_REG_R11];
 	tf->tf_r12 = gr[_REG_R12];
 	tf->tf_usr_sp = gr[_REG_SP];
 	tf->tf_usr_lr = gr[_REG_LR];
 	tf->tf_pc = gr[_REG_PC];
 	tf->tf_spsr = gr[_REG_CPSR];
 #ifdef VFP
 	if (vfp != NULL)
 		set_vfpcontext(td, vfp);
 #endif
 	return (0);
 }
 
 void
 sendsig(catcher, ksi, mask)
 	sig_t catcher;
 	ksiginfo_t *ksi;
 	sigset_t *mask;
 {
 	struct thread *td;
 	struct proc *p;
 	struct trapframe *tf;
 	struct sigframe *fp, frame;
 	struct sigacts *psp;
 	struct sysentvec *sysent;
 	int onstack;
 	int sig;
 	int code;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	code = ksi->ksi_code;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	tf = td->td_frame;
 	onstack = sigonstack(tf->tf_usr_sp);
 
 	CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
 	    catcher, sig);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !(onstack) &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size);
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		fp = (struct sigframe *)td->td_frame->tf_usr_sp;
 
 	/* make room on the stack */
 	fp--;
 
 	/* make the stack aligned */
 	fp = (struct sigframe *)STACKALIGN(fp);
 	/* Populate the siginfo frame. */
 	bzero(&frame, sizeof(frame));
 	get_mcontext(td, &frame.sf_uc.uc_mcontext, 0);
 #ifdef VFP
 	get_vfpcontext(td, &frame.sf_vfp);
 	frame.sf_uc.uc_mcontext.mc_vfp_size = sizeof(fp->sf_vfp);
 	frame.sf_uc.uc_mcontext.mc_vfp_ptr = &fp->sf_vfp;
 #else
 	frame.sf_uc.uc_mcontext.mc_vfp_size = 0;
 	frame.sf_uc.uc_mcontext.mc_vfp_ptr = NULL;
 #endif
 	frame.sf_si = ksi->ksi_info;
 	frame.sf_uc.uc_sigmask = *mask;
 	frame.sf_uc.uc_stack = td->td_sigstk;
 	frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) != 0 ?
 	    (onstack ? SS_ONSTACK : 0) : SS_DISABLE;
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(td->td_proc);
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&frame, fp, sizeof(*fp)) != 0) {
 		/* Process has trashed its stack. Kill it. */
 		CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp);
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	/*
 	 * Build context to run handler in.  We invoke the handler
 	 * directly, only returning via the trampoline.  Note the
 	 * trampoline version numbers are coordinated with machine-
 	 * dependent code in libc.
 	 */
 
 	tf->tf_r0 = sig;
 	tf->tf_r1 = (register_t)&fp->sf_si;
 	tf->tf_r2 = (register_t)&fp->sf_uc;
 
 	/* the trampoline uses r5 as the uc address */
 	tf->tf_r5 = (register_t)&fp->sf_uc;
 	tf->tf_pc = (register_t)catcher;
 	tf->tf_usr_sp = (register_t)fp;
 	sysent = p->p_sysent;
 	if (sysent->sv_sigcode_base != 0)
 		tf->tf_usr_lr = (register_t)sysent->sv_sigcode_base;
 	else
 		tf->tf_usr_lr = (register_t)(sysent->sv_psstrings -
 		    *(sysent->sv_szsigcode));
 	/* Set the mode to enter in the signal handler */
 #if __ARM_ARCH >= 7
 	if ((register_t)catcher & 1)
 		tf->tf_spsr |= PSR_T;
 	else
 		tf->tf_spsr &= ~PSR_T;
 #endif
 
 	CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_usr_lr,
 	    tf->tf_usr_sp);
 
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 int
 sys_sigreturn(td, uap)
 	struct thread *td;
 	struct sigreturn_args /* {
 		const struct __ucontext *sigcntxp;
 	} */ *uap;
 {
 	ucontext_t uc;
 	int error;
 
 	if (uap == NULL)
 		return (EFAULT);
 	if (copyin(uap->sigcntxp, &uc, sizeof(uc)))
 		return (EFAULT);
 	/* Restore register context. */
 	error = set_mcontext(td, &uc.uc_mcontext);
 	if (error != 0)
 		return (error);
 
 	/* Restore signal mask. */
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	return (EJUSTRETURN);
 }
 
 /*
  * Construct a PCB from a trapframe. This is called from kdb_trap() where
  * we want to start a backtrace from the function that caused us to enter
  * the debugger. We have the context in the trapframe, but base the trace
  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
  * enough for a backtrace.
  */
 void
 makectx(struct trapframe *tf, struct pcb *pcb)
 {
 	pcb->pcb_regs.sf_r4 = tf->tf_r4;
 	pcb->pcb_regs.sf_r5 = tf->tf_r5;
 	pcb->pcb_regs.sf_r6 = tf->tf_r6;
 	pcb->pcb_regs.sf_r7 = tf->tf_r7;
 	pcb->pcb_regs.sf_r8 = tf->tf_r8;
 	pcb->pcb_regs.sf_r9 = tf->tf_r9;
 	pcb->pcb_regs.sf_r10 = tf->tf_r10;
 	pcb->pcb_regs.sf_r11 = tf->tf_r11;
 	pcb->pcb_regs.sf_r12 = tf->tf_r12;
 	pcb->pcb_regs.sf_pc = tf->tf_pc;
 	pcb->pcb_regs.sf_lr = tf->tf_usr_lr;
 	pcb->pcb_regs.sf_sp = tf->tf_usr_sp;
 }
 
 void
 pcpu0_init(void)
 {
 	set_curthread(&thread0);
 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
 	pcpup->pc_mpidr = cp15_mpidr_get() & 0xFFFFFF;
 	PCPU_SET(curthread, &thread0);
 }
 
 /*
  * Initialize proc0
  */
 void
 init_proc0(vm_offset_t kstack)
 {
 	proc_linkup0(&proc0, &thread0);
 	thread0.td_kstack = kstack;
 	thread0.td_kstack_pages = kstack_pages;
 	thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
 	    thread0.td_kstack_pages * PAGE_SIZE) - 1;
 	thread0.td_pcb->pcb_flags = 0;
 	thread0.td_pcb->pcb_vfpcpu = -1;
 	thread0.td_pcb->pcb_vfpstate.fpscr = VFPSCR_DN;
 	thread0.td_frame = &proc0_tf;
 	pcpup->pc_curpcb = thread0.td_pcb;
 }
 
 void
 set_stackptrs(int cpu)
 {
 
 	set_stackptr(PSR_IRQ32_MODE,
 	    irqstack + ((IRQ_STACK_SIZE * PAGE_SIZE) * (cpu + 1)));
 	set_stackptr(PSR_ABT32_MODE,
 	    abtstack + ((ABT_STACK_SIZE * PAGE_SIZE) * (cpu + 1)));
 	set_stackptr(PSR_UND32_MODE,
 	    undstack + ((UND_STACK_SIZE * PAGE_SIZE) * (cpu + 1)));
 }
 
 static void
 arm_kdb_init(void)
 {
 
 	kdb_init();
 #ifdef KDB
 	if (boothowto & RB_KDB)
 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 #endif
 }
 
 #ifdef FDT
 void *
 initarm(struct arm_boot_params *abp)
 {
 	struct mem_region mem_regions[FDT_MEM_REGIONS];
 	vm_paddr_t lastaddr;
 	vm_offset_t dtbp, kernelstack, dpcpu;
 	char *env;
 	void *kmdp;
 	int err_devmap, mem_regions_sz;
 	phandle_t root;
 	char dts_version[255];
 #ifdef EFI
 	struct efi_map_header *efihdr;
 #endif
 
 	/* get last allocated physical address */
 	arm_physmem_kernaddr = abp->abp_physaddr;
 	lastaddr = parse_boot_param(abp) - KERNVIRTADDR + arm_physmem_kernaddr;
 
 	set_cpufuncs();
 	cpuinfo_init();
 
 	/*
 	 * Find the dtb passed in by the boot loader.
 	 */
 	kmdp = preload_search_by_type("elf kernel");
 	dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
 #if defined(FDT_DTB_STATIC)
 	/*
 	 * In case the device tree blob was not retrieved (from metadata) try
 	 * to use the statically embedded one.
 	 */
 	if (dtbp == (vm_offset_t)NULL)
 		dtbp = (vm_offset_t)&fdt_static_dtb;
 #endif
 
 	if (OF_install(OFW_FDT, 0) == FALSE)
 		panic("Cannot install FDT");
 
 	if (OF_init((void *)dtbp) != 0)
 		panic("OF_init failed with the found device tree");
 
 #if defined(LINUX_BOOT_ABI)
 	arm_parse_fdt_bootargs();
 #endif
 
 #ifdef EFI
 	efihdr = (struct efi_map_header *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_EFI_MAP);
 	if (efihdr != NULL) {
 		arm_add_efi_map_entries(efihdr, mem_regions, &mem_regions_sz);
 	} else
 #endif
 	{
 		/* Grab physical memory regions information from device tree. */
 		if (fdt_get_mem_regions(mem_regions, &mem_regions_sz,NULL) != 0)
 			panic("Cannot get physical memory regions");
 	}
 	physmem_hardware_regions(mem_regions, mem_regions_sz);
 
 	/* Grab reserved memory regions information from device tree. */
 	if (fdt_get_reserved_regions(mem_regions, &mem_regions_sz) == 0)
 		physmem_exclude_regions(mem_regions, mem_regions_sz,
 		    EXFLAG_NODUMP | EXFLAG_NOALLOC);
 
 	/*
 	 * Set TEX remapping registers.
 	 * Setup kernel page tables and switch to kernel L1 page table.
 	 */
 	pmap_set_tex();
 	pmap_bootstrap_prepare(lastaddr);
 
 	/*
 	 * If EARLY_PRINTF support is enabled, we need to re-establish the
 	 * mapping after pmap_bootstrap_prepare() switches to new page tables.
 	 * Note that we can only do the remapping if the VA is outside the
 	 * kernel, now that we have real virtual (not VA=PA) mappings in effect.
 	 * Early printf does not work between the time pmap_set_tex() does
 	 * cp15_prrr_set() and this code remaps the VA.
 	 */
 #if defined(EARLY_PRINTF) && defined(SOCDEV_PA) && defined(SOCDEV_VA) && SOCDEV_VA < KERNBASE
 	pmap_preboot_map_attr(SOCDEV_PA, SOCDEV_VA, 1024 * 1024, 
 	    VM_PROT_READ | VM_PROT_WRITE, VM_MEMATTR_DEVICE);
 #endif
 
 	/*
 	 * Now that proper page tables are installed, call cpu_setup() to enable
 	 * instruction and data caches and other chip-specific features.
 	 */
 	cpu_setup();
 
 	/* Platform-specific initialisation */
 	platform_probe_and_attach();
 	pcpu0_init();
 
 	/* Do basic tuning, hz etc */
 	init_param1();
 
 	/*
 	 * Allocate a page for the system page mapped to 0xffff0000
 	 * This page will just contain the system vectors and can be
 	 * shared by all processes.
 	 */
 	systempage = pmap_preboot_get_pages(1);
 
 	/* Map the vector page. */
 	pmap_preboot_map_pages(systempage, ARM_VECTORS_HIGH,  1);
 	if (virtual_end >= ARM_VECTORS_HIGH)
 		virtual_end = ARM_VECTORS_HIGH - 1;
 
 	/* Allocate dynamic per-cpu area. */
 	dpcpu = pmap_preboot_get_vpages(DPCPU_SIZE / PAGE_SIZE);
 	dpcpu_init((void *)dpcpu, 0);
 
 	/* Allocate stacks for all modes */
 	irqstack    = pmap_preboot_get_vpages(IRQ_STACK_SIZE * MAXCPU);
 	abtstack    = pmap_preboot_get_vpages(ABT_STACK_SIZE * MAXCPU);
 	undstack    = pmap_preboot_get_vpages(UND_STACK_SIZE * MAXCPU );
 	kernelstack = pmap_preboot_get_vpages(kstack_pages);
 
 	/* Allocate message buffer. */
 	msgbufp = (void *)pmap_preboot_get_vpages(
 	    round_page(msgbufsize) / PAGE_SIZE);
 
 	/*
 	 * Pages were allocated during the secondary bootstrap for the
 	 * stacks for different CPU modes.
 	 * We must now set the r13 registers in the different CPU modes to
 	 * point to these stacks.
 	 * Since the ARM stacks use STMFD etc. we must set r13 to the top end
 	 * of the stack memory.
 	 */
 	set_stackptrs(0);
 	mutex_init();
 
 	/* Establish static device mappings. */
 	err_devmap = platform_devmap_init();
 	devmap_bootstrap(0, NULL);
 	vm_max_kernel_address = platform_lastaddr();
 
 	/*
 	 * Only after the SOC registers block is mapped we can perform device
 	 * tree fixups, as they may attempt to read parameters from hardware.
 	 */
 	OF_interpret("perform-fixup", 0);
 	platform_gpio_init();
 	cninit();
 
 	/*
 	 * If we made a mapping for EARLY_PRINTF after pmap_bootstrap_prepare(),
 	 * undo it now that the normal console printf works.
 	 */
 #if defined(EARLY_PRINTF) && defined(SOCDEV_PA) && defined(SOCDEV_VA) && SOCDEV_VA < KERNBASE
 	pmap_kremove(SOCDEV_VA);
 #endif
 
 	debugf("initarm: console initialized\n");
 	debugf(" arg1 kmdp = 0x%08x\n", (uint32_t)kmdp);
 	debugf(" boothowto = 0x%08x\n", boothowto);
 	debugf(" dtbp = 0x%08x\n", (uint32_t)dtbp);
 	debugf(" lastaddr1: 0x%08x\n", lastaddr);
 	arm_print_kenv();
 
 	env = kern_getenv("kernelname");
 	if (env != NULL)
 		strlcpy(kernelname, env, sizeof(kernelname));
 
 	if (err_devmap != 0)
 		printf("WARNING: could not fully configure devmap, error=%d\n",
 		    err_devmap);
 
 	platform_late_init();
 
 	root = OF_finddevice("/");
 	if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) {
 		if (strcmp(LINUX_DTS_VERSION, dts_version) != 0)
 			printf("WARNING: DTB version is %s while kernel expects %s, "
 			    "please update the DTB in the ESP\n",
 			    dts_version,
 			    LINUX_DTS_VERSION);
 	} else {
 		printf("WARNING: Cannot find freebsd,dts-version property, "
 		    "cannot check DTB compliance\n");
 	}
 
 	/*
 	 * We must now clean the cache again....
 	 * Cleaning may be done by reading new data to displace any
 	 * dirty data in the cache. This will have happened in cpu_setttb()
 	 * but since we are boot strapping the addresses used for the read
 	 * may have just been remapped and thus the cache could be out
 	 * of sync. A re-clean after the switch will cure this.
 	 * After booting there are no gross relocations of the kernel thus
 	 * this problem will not occur after initarm().
 	 */
 	/* Set stack for exception handlers */
 	undefined_init();
 	init_proc0(kernelstack);
 	arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL);
 	enable_interrupts(PSR_A);
 	pmap_bootstrap(0);
 
 	/* Exclude the kernel (and all the things we allocated which immediately
 	 * follow the kernel) from the VM allocation pool but not from crash
 	 * dumps.  virtual_avail is a global variable which tracks the kva we've
 	 * "allocated" while setting up pmaps.
 	 *
 	 * Prepare the list of physical memory available to the vm subsystem.
 	 */
 	physmem_exclude_region(abp->abp_physaddr,
 		pmap_preboot_get_pages(0) - abp->abp_physaddr, EXFLAG_NOALLOC);
 	physmem_init_kernel_globals();
 
 	init_param2(physmem);
 	/* Init message buffer. */
 	msgbufinit(msgbufp, msgbufsize);
 	dbg_monitor_init();
 	arm_kdb_init();
 	/* Apply possible BP hardening. */
 	cpuinfo_init_bp_hardening();
 	return ((void *)STACKALIGN(thread0.td_pcb));
 
 }
 #endif /* FDT */
diff --git a/sys/arm/arm/support.S b/sys/arm/arm/support.S
index a3ee3c3b93ba..7a39c283aed1 100644
--- a/sys/arm/arm/support.S
+++ b/sys/arm/arm/support.S
@@ -1,2122 +1,2081 @@
 /*-
  * Copyright (c) 2004 Olivier Houchard
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 /*
  * Copyright 2003 Wasabi Systems, Inc.
  * All rights reserved.
  *
  * Written by Steve C. Woodford for Wasabi Systems, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed for the NetBSD Project by
  *      Wasabi Systems, Inc.
  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
  *    or promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 /*
  * Copyright (c) 1997 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Neil A. Carson and Mark Brinicombe
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <machine/asm.h>
 __FBSDID("$FreeBSD$");
 
 #include "assym.inc"
 
 	.syntax	unified
 
-.L_arm_memcpy:
-	.word	_C_LABEL(_arm_memcpy)
-.L_arm_bzero:
-	.word	_C_LABEL(_arm_bzero)
-.L_min_memcpy_size:
-	.word	_C_LABEL(_min_memcpy_size)
-.L_min_bzero_size:
-	.word	_C_LABEL(_min_bzero_size)
 /*
  * memset: Sets a block of memory to the specified value
  *
  * On entry:
  *   r0 - dest address
  *   r1 - byte to write
  *   r2 - number of bytes to write
  *
  * On exit:
  *   r0 - dest address
  */
 /* LINTSTUB: Func: void bzero(void *, size_t) */
 ENTRY(bzero)
-	ldr	r3, .L_arm_bzero
-	ldr	r3, [r3]
-	cmp	r3, #0
-	beq	.Lnormal0
-	ldr	r2, .L_min_bzero_size
-	ldr	r2, [r2]
-	cmp	r1, r2
-	blt	.Lnormal0
-	stmfd	sp!, {r0, r1, lr}
-	mov	r2, #0
-	mov	lr, pc
-	mov	pc, r3
-	cmp	r0, #0
-	ldmfd	sp!, {r0, r1, lr}
-	RETeq
-.Lnormal0:
 	mov	r3, #0x00
 	b	do_memset
 END(bzero)
 /* LINTSTUB: Func: void *memset(void *, int, size_t) */
 ENTRY(memset)
 	and	r3, r1, #0xff		/* We deal with bytes */
 	mov	r1, r2
 do_memset:
 	cmp	r1, #0x04		/* Do we have less than 4 bytes */
 	mov	ip, r0
 	blt	.Lmemset_lessthanfour
 
 	/* Ok first we will word align the address */
 	ands	r2, ip, #0x03		/* Get the bottom two bits */
 	bne	.Lmemset_wordunaligned	/* The address is not word aligned */
 
 	/* We are now word aligned */
 .Lmemset_wordaligned:
 	orr	r3, r3, r3, lsl #8	/* Extend value to 16-bits */
 	tst	ip, #0x04		/* Quad-align for armv5e */
 	orr	r3, r3, r3, lsl #16	/* Extend value to 32-bits */
 	subne	r1, r1, #0x04		/* Quad-align if necessary */
 	strne	r3, [ip], #0x04
 	cmp	r1, #0x10
 	blt	.Lmemset_loop4		/* If less than 16 then use words */
 	mov	r2, r3			/* Duplicate data */
 	cmp	r1, #0x80		/* If < 128 then skip the big loop */
 	blt	.Lmemset_loop32
 
 	/* Do 128 bytes at a time */
 .Lmemset_loop128:
 	subs	r1, r1, #0x80
 	strdge	r2, [ip], #0x08
 	strdge	r2, [ip], #0x08
 	strdge	r2, [ip], #0x08
 	strdge	r2, [ip], #0x08
 	strdge	r2, [ip], #0x08
 	strdge	r2, [ip], #0x08
 	strdge	r2, [ip], #0x08
 	strdge	r2, [ip], #0x08
 	strdge	r2, [ip], #0x08
 	strdge	r2, [ip], #0x08
 	strdge	r2, [ip], #0x08
 	strdge	r2, [ip], #0x08
 	strdge	r2, [ip], #0x08
 	strdge	r2, [ip], #0x08
 	strdge	r2, [ip], #0x08
 	strdge	r2, [ip], #0x08
 	bgt	.Lmemset_loop128
 	RETeq			/* Zero length so just exit */
 
 	add	r1, r1, #0x80		/* Adjust for extra sub */
 
 	/* Do 32 bytes at a time */
 .Lmemset_loop32:
 	subs	r1, r1, #0x20
 	strdge	r2, [ip], #0x08
 	strdge	r2, [ip], #0x08
 	strdge	r2, [ip], #0x08
 	strdge	r2, [ip], #0x08
 	bgt	.Lmemset_loop32
 	RETeq			/* Zero length so just exit */
 
 	adds	r1, r1, #0x10		/* Partially adjust for extra sub */
 
 	/* Deal with 16 bytes or more */
 	strdge	r2, [ip], #0x08
 	strdge	r2, [ip], #0x08
 	RETeq			/* Zero length so just exit */
 
 	addlt	r1, r1, #0x10		/* Possibly adjust for extra sub */
 
 	/* We have at least 4 bytes so copy as words */
 .Lmemset_loop4:
 	subs	r1, r1, #0x04
 	strge	r3, [ip], #0x04
 	bgt	.Lmemset_loop4
 	RETeq			/* Zero length so just exit */
 
 	/* Compensate for 64-bit alignment check */
 	adds	r1, r1, #0x04
 	RETeq
 	cmp	r1, #2
 
 	strb	r3, [ip], #0x01		/* Set 1 byte */
 	strbge	r3, [ip], #0x01		/* Set another byte */
 	strbgt	r3, [ip]		/* and a third */
 	RET			/* Exit */
 
 .Lmemset_wordunaligned:
 	rsb	r2, r2, #0x004
 	strb	r3, [ip], #0x01		/* Set 1 byte */
 	cmp	r2, #0x02
 	strbge	r3, [ip], #0x01		/* Set another byte */
 	sub	r1, r1, r2
 	strbgt	r3, [ip], #0x01		/* and a third */
 	cmp	r1, #0x04		/* More than 4 bytes left? */
 	bge	.Lmemset_wordaligned	/* Yup */
 
 .Lmemset_lessthanfour:
 	cmp	r1, #0x00
 	RETeq			/* Zero length so exit */
 	strb	r3, [ip], #0x01		/* Set 1 byte */
 	cmp	r1, #0x02
 	strbge	r3, [ip], #0x01		/* Set another byte */
 	strbgt	r3, [ip]		/* and a third */
 	RET			/* Exit */
 EEND(memset)
 END(bzero)
 
 ENTRY(bcmp)
 	mov	ip, r0
 	cmp	r2, #0x06
 	beq	.Lmemcmp_6bytes
 	mov	r0, #0x00
 
 	/* Are both addresses aligned the same way? */
 	cmp	r2, #0x00
 	eorsne	r3, ip, r1
 	RETeq			/* len == 0, or same addresses! */
 	tst	r3, #0x03
 	subne	r2, r2, #0x01
 	bne	.Lmemcmp_bytewise2	/* Badly aligned. Do it the slow way */
 
 	/* Word-align the addresses, if necessary */
 	sub	r3, r1, #0x05
 	ands	r3, r3, #0x03
 	add	r3, r3, r3, lsl #1
 	addne	pc, pc, r3, lsl #3
 	nop
 
 	/* Compare up to 3 bytes */
 	ldrb	r0, [ip], #0x01
 	ldrb	r3, [r1], #0x01
 	subs	r0, r0, r3
 	RETne
 	subs	r2, r2, #0x01
 	RETeq
 
 	/* Compare up to 2 bytes */
 	ldrb	r0, [ip], #0x01
 	ldrb	r3, [r1], #0x01
 	subs	r0, r0, r3
 	RETne
 	subs	r2, r2, #0x01
 	RETeq
 
 	/* Compare 1 byte */
 	ldrb	r0, [ip], #0x01
 	ldrb	r3, [r1], #0x01
 	subs	r0, r0, r3
 	RETne
 	subs	r2, r2, #0x01
 	RETeq
 
 	/* Compare 4 bytes at a time, if possible */
 	subs	r2, r2, #0x04
 	bcc	.Lmemcmp_bytewise
 .Lmemcmp_word_aligned:
 	ldr	r0, [ip], #0x04
 	ldr	r3, [r1], #0x04
 	subs	r2, r2, #0x04
 	cmpcs	r0, r3
 	beq	.Lmemcmp_word_aligned
 	sub	r0, r0, r3
 
 	/* Correct for extra subtraction, and check if done */
 	adds	r2, r2, #0x04
 	cmpeq	r0, #0x00		/* If done, did all bytes match? */
 	RETeq			/* Yup. Just return */
 
 	/* Re-do the final word byte-wise */
 	sub	ip, ip, #0x04
 	sub	r1, r1, #0x04
 
 .Lmemcmp_bytewise:
 	add	r2, r2, #0x03
 .Lmemcmp_bytewise2:
 	ldrb	r0, [ip], #0x01
 	ldrb	r3, [r1], #0x01
 	subs	r2, r2, #0x01
 	cmpcs	r0, r3
 	beq	.Lmemcmp_bytewise2
 	sub	r0, r0, r3
 	RET
 
 	/*
 	 * 6 byte compares are very common, thanks to the network stack.
 	 * This code is hand-scheduled to reduce the number of stalls for
 	 * load results. Everything else being equal, this will be ~32%
 	 * faster than a byte-wise memcmp.
 	 */
 	.align	5
 .Lmemcmp_6bytes:
 	ldrb	r3, [r1, #0x00]		/* r3 = b2#0 */
 	ldrb	r0, [ip, #0x00]		/* r0 = b1#0 */
 	ldrb	r2, [r1, #0x01]		/* r2 = b2#1 */
 	subs	r0, r0, r3		/* r0 = b1#0 - b2#0 */
 	ldrbeq	r3, [ip, #0x01]		/* r3 = b1#1 */
 	RETne			/* Return if mismatch on #0 */
 	subs	r0, r3, r2		/* r0 = b1#1 - b2#1 */
 	ldrbeq	r3, [r1, #0x02]		/* r3 = b2#2 */
 	ldrbeq	r0, [ip, #0x02]		/* r0 = b1#2 */
 	RETne			/* Return if mismatch on #1 */
 	ldrb	r2, [r1, #0x03]		/* r2 = b2#3 */
 	subs	r0, r0, r3		/* r0 = b1#2 - b2#2 */
 	ldrbeq	r3, [ip, #0x03]		/* r3 = b1#3 */
 	RETne			/* Return if mismatch on #2 */
 	subs	r0, r3, r2		/* r0 = b1#3 - b2#3 */
 	ldrbeq	r3, [r1, #0x04]		/* r3 = b2#4 */
 	ldrbeq	r0, [ip, #0x04]		/* r0 = b1#4 */
 	RETne			/* Return if mismatch on #3 */
 	ldrb	r2, [r1, #0x05]		/* r2 = b2#5 */
 	subs	r0, r0, r3		/* r0 = b1#4 - b2#4 */
 	ldrbeq	r3, [ip, #0x05]		/* r3 = b1#5 */
 	RETne			/* Return if mismatch on #4 */
 	sub	r0, r3, r2		/* r0 = b1#5 - b2#5 */
 	RET
 END(bcmp)
 
 ENTRY(bcopy)
 	/* switch the source and destination registers */
 	eor     r0, r1, r0
 	eor     r1, r0, r1
 	eor     r0, r1, r0
 EENTRY(memmove)
 	/* Do the buffers overlap? */
 	cmp	r0, r1
 	RETeq		/* Bail now if src/dst are the same */
 	subcc	r3, r0, r1	/* if (dst > src) r3 = dst - src */
 	subcs	r3, r1, r0	/* if (src > dsr) r3 = src - dst */
 	cmp	r3, r2		/* if (r3 < len) we have an overlap */
 	bcc	PIC_SYM(_C_LABEL(memcpy), PLT)
 
 	/* Determine copy direction */
 	cmp	r1, r0
 	bcc	.Lmemmove_backwards
 
 	moveq	r0, #0			/* Quick abort for len=0 */
 	RETeq
 
 	stmdb	sp!, {r0, lr}		/* memmove() returns dest addr */
 	subs	r2, r2, #4
 	blt	.Lmemmove_fl4		/* less than 4 bytes */
 	ands	r12, r0, #3
 	bne	.Lmemmove_fdestul	/* oh unaligned destination addr */
 	ands	r12, r1, #3
 	bne	.Lmemmove_fsrcul		/* oh unaligned source addr */
 
 .Lmemmove_ft8:
 	/* We have aligned source and destination */
 	subs	r2, r2, #8
 	blt	.Lmemmove_fl12		/* less than 12 bytes (4 from above) */
 	subs	r2, r2, #0x14
 	blt	.Lmemmove_fl32		/* less than 32 bytes (12 from above) */
 	stmdb	sp!, {r4}		/* borrow r4 */
 
 	/* blat 32 bytes at a time */
 	/* XXX for really big copies perhaps we should use more registers */
 .Lmemmove_floop32:
 	ldmia	r1!, {r3, r4, r12, lr}
 	stmia	r0!, {r3, r4, r12, lr}
 	ldmia	r1!, {r3, r4, r12, lr}
 	stmia	r0!, {r3, r4, r12, lr}
 	subs	r2, r2, #0x20
 	bge	.Lmemmove_floop32
 
 	cmn	r2, #0x10
 	ldmiage	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
 	stmiage	r0!, {r3, r4, r12, lr}
 	subge	r2, r2, #0x10
 	ldmia	sp!, {r4}		/* return r4 */
 
 .Lmemmove_fl32:
 	adds	r2, r2, #0x14
 
 	/* blat 12 bytes at a time */
 .Lmemmove_floop12:
 	ldmiage	r1!, {r3, r12, lr}
 	stmiage	r0!, {r3, r12, lr}
 	subsge	r2, r2, #0x0c
 	bge	.Lmemmove_floop12
 
 .Lmemmove_fl12:
 	adds	r2, r2, #8
 	blt	.Lmemmove_fl4
 
 	subs	r2, r2, #4
 	ldrlt	r3, [r1], #4
 	strlt	r3, [r0], #4
 	ldmiage	r1!, {r3, r12}
 	stmiage	r0!, {r3, r12}
 	subge	r2, r2, #4
 
 .Lmemmove_fl4:
 	/* less than 4 bytes to go */
 	adds	r2, r2, #4
 	ldmiaeq	sp!, {r0, pc}		/* done */
 
 	/* copy the crud byte at a time */
 	cmp	r2, #2
 	ldrb	r3, [r1], #1
 	strb	r3, [r0], #1
 	ldrbge	r3, [r1], #1
 	strbge	r3, [r0], #1
 	ldrbgt	r3, [r1], #1
 	strbgt	r3, [r0], #1
 	ldmia	sp!, {r0, pc}
 
 	/* erg - unaligned destination */
 .Lmemmove_fdestul:
 	rsb	r12, r12, #4
 	cmp	r12, #2
 
 	/* align destination with byte copies */
 	ldrb	r3, [r1], #1
 	strb	r3, [r0], #1
 	ldrbge	r3, [r1], #1
 	strbge	r3, [r0], #1
 	ldrbgt	r3, [r1], #1
 	strbgt	r3, [r0], #1
 	subs	r2, r2, r12
 	blt	.Lmemmove_fl4		/* less the 4 bytes */
 
 	ands	r12, r1, #3
 	beq	.Lmemmove_ft8		/* we have an aligned source */
 
 	/* erg - unaligned source */
 	/* This is where it gets nasty ... */
 .Lmemmove_fsrcul:
 	bic	r1, r1, #3
 	ldr	lr, [r1], #4
 	cmp	r12, #2
 	bgt	.Lmemmove_fsrcul3
 	beq	.Lmemmove_fsrcul2
 	cmp	r2, #0x0c
 	blt	.Lmemmove_fsrcul1loop4
 	sub	r2, r2, #0x0c
 	stmdb	sp!, {r4, r5}
 
 .Lmemmove_fsrcul1loop16:
 	mov	r3, lr, lsr #8
 	ldmia	r1!, {r4, r5, r12, lr}
 	orr	r3, r3, r4, lsl #24
 	mov	r4, r4, lsr #8
 	orr	r4, r4, r5, lsl #24
 	mov	r5, r5, lsr #8
 	orr	r5, r5, r12, lsl #24
 	mov	r12, r12, lsr #8
 	orr	r12, r12, lr, lsl #24
 	stmia	r0!, {r3-r5, r12}
 	subs	r2, r2, #0x10
 	bge	.Lmemmove_fsrcul1loop16
 	ldmia	sp!, {r4, r5}
 	adds	r2, r2, #0x0c
 	blt	.Lmemmove_fsrcul1l4
 
 .Lmemmove_fsrcul1loop4:
 	mov	r12, lr, lsr #8
 	ldr	lr, [r1], #4
 	orr	r12, r12, lr, lsl #24
 	str	r12, [r0], #4
 	subs	r2, r2, #4
 	bge	.Lmemmove_fsrcul1loop4
 
 .Lmemmove_fsrcul1l4:
 	sub	r1, r1, #3
 	b	.Lmemmove_fl4
 
 .Lmemmove_fsrcul2:
 	cmp	r2, #0x0c
 	blt	.Lmemmove_fsrcul2loop4
 	sub	r2, r2, #0x0c
 	stmdb	sp!, {r4, r5}
 
 .Lmemmove_fsrcul2loop16:
 	mov	r3, lr, lsr #16
 	ldmia	r1!, {r4, r5, r12, lr}
 	orr	r3, r3, r4, lsl #16
 	mov	r4, r4, lsr #16
 	orr	r4, r4, r5, lsl #16
 	mov	r5, r5, lsr #16
 	orr	r5, r5, r12, lsl #16
 	mov	r12, r12, lsr #16
 	orr	r12, r12, lr, lsl #16
 	stmia	r0!, {r3-r5, r12}
 	subs	r2, r2, #0x10
 	bge	.Lmemmove_fsrcul2loop16
 	ldmia	sp!, {r4, r5}
 	adds	r2, r2, #0x0c
 	blt	.Lmemmove_fsrcul2l4
 
 .Lmemmove_fsrcul2loop4:
 	mov	r12, lr, lsr #16
 	ldr	lr, [r1], #4
 	orr	r12, r12, lr, lsl #16
 	str	r12, [r0], #4
 	subs	r2, r2, #4
 	bge	.Lmemmove_fsrcul2loop4
 
 .Lmemmove_fsrcul2l4:
 	sub	r1, r1, #2
 	b	.Lmemmove_fl4
 
 .Lmemmove_fsrcul3:
 	cmp	r2, #0x0c
 	blt	.Lmemmove_fsrcul3loop4
 	sub	r2, r2, #0x0c
 	stmdb	sp!, {r4, r5}
 
 .Lmemmove_fsrcul3loop16:
 	mov	r3, lr, lsr #24
 	ldmia	r1!, {r4, r5, r12, lr}
 	orr	r3, r3, r4, lsl #8
 	mov	r4, r4, lsr #24
 	orr	r4, r4, r5, lsl #8
 	mov	r5, r5, lsr #24
 	orr	r5, r5, r12, lsl #8
 	mov	r12, r12, lsr #24
 	orr	r12, r12, lr, lsl #8
 	stmia	r0!, {r3-r5, r12}
 	subs	r2, r2, #0x10
 	bge	.Lmemmove_fsrcul3loop16
 	ldmia	sp!, {r4, r5}
 	adds	r2, r2, #0x0c
 	blt	.Lmemmove_fsrcul3l4
 
 .Lmemmove_fsrcul3loop4:
 	mov	r12, lr, lsr #24
 	ldr	lr, [r1], #4
 	orr	r12, r12, lr, lsl #8
 	str	r12, [r0], #4
 	subs	r2, r2, #4
 	bge	.Lmemmove_fsrcul3loop4
 
 .Lmemmove_fsrcul3l4:
 	sub	r1, r1, #1
 	b	.Lmemmove_fl4
 
 .Lmemmove_backwards:
 	add	r1, r1, r2
 	add	r0, r0, r2
 	subs	r2, r2, #4
 	blt	.Lmemmove_bl4		/* less than 4 bytes */
 	ands	r12, r0, #3
 	bne	.Lmemmove_bdestul	/* oh unaligned destination addr */
 	ands	r12, r1, #3
 	bne	.Lmemmove_bsrcul		/* oh unaligned source addr */
 
 .Lmemmove_bt8:
 	/* We have aligned source and destination */
 	subs	r2, r2, #8
 	blt	.Lmemmove_bl12		/* less than 12 bytes (4 from above) */
 	stmdb	sp!, {r4, lr}
 	subs	r2, r2, #0x14		/* less than 32 bytes (12 from above) */
 	blt	.Lmemmove_bl32
 
 	/* blat 32 bytes at a time */
 	/* XXX for really big copies perhaps we should use more registers */
 .Lmemmove_bloop32:
 	ldmdb	r1!, {r3, r4, r12, lr}
 	stmdb	r0!, {r3, r4, r12, lr}
 	ldmdb	r1!, {r3, r4, r12, lr}
 	stmdb	r0!, {r3, r4, r12, lr}
 	subs	r2, r2, #0x20
 	bge	.Lmemmove_bloop32
 
 .Lmemmove_bl32:
 	cmn	r2, #0x10
 	ldmdbge	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
 	stmdbge	r0!, {r3, r4, r12, lr}
 	subge	r2, r2, #0x10
 	adds	r2, r2, #0x14
 	ldmdbge	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */
 	stmdbge	r0!, {r3, r12, lr}
 	subge	r2, r2, #0x0c
 	ldmia	sp!, {r4, lr}
 
 .Lmemmove_bl12:
 	adds	r2, r2, #8
 	blt	.Lmemmove_bl4
 	subs	r2, r2, #4
 	ldrlt	r3, [r1, #-4]!
 	strlt	r3, [r0, #-4]!
 	ldmdbge	r1!, {r3, r12}
 	stmdbge	r0!, {r3, r12}
 	subge	r2, r2, #4
 
 .Lmemmove_bl4:
 	/* less than 4 bytes to go */
 	adds	r2, r2, #4
 	RETeq			/* done */
 
 	/* copy the crud byte at a time */
 	cmp	r2, #2
 	ldrb	r3, [r1, #-1]!
 	strb	r3, [r0, #-1]!
 	ldrbge	r3, [r1, #-1]!
 	strbge	r3, [r0, #-1]!
 	ldrbgt	r3, [r1, #-1]!
 	strbgt	r3, [r0, #-1]!
 	RET
 
 	/* erg - unaligned destination */
 .Lmemmove_bdestul:
 	cmp	r12, #2
 
 	/* align destination with byte copies */
 	ldrb	r3, [r1, #-1]!
 	strb	r3, [r0, #-1]!
 	ldrbge	r3, [r1, #-1]!
 	strbge	r3, [r0, #-1]!
 	ldrbgt	r3, [r1, #-1]!
 	strbgt	r3, [r0, #-1]!
 	subs	r2, r2, r12
 	blt	.Lmemmove_bl4		/* less than 4 bytes to go */
 	ands	r12, r1, #3
 	beq	.Lmemmove_bt8		/* we have an aligned source */
 
 	/* erg - unaligned source */
 	/* This is where it gets nasty ... */
 .Lmemmove_bsrcul:
 	bic	r1, r1, #3
 	ldr	r3, [r1, #0]
 	cmp	r12, #2
 	blt	.Lmemmove_bsrcul1
 	beq	.Lmemmove_bsrcul2
 	cmp	r2, #0x0c
 	blt	.Lmemmove_bsrcul3loop4
 	sub	r2, r2, #0x0c
 	stmdb	sp!, {r4, r5, lr}
 
 .Lmemmove_bsrcul3loop16:
 	mov	lr, r3, lsl #8
 	ldmdb	r1!, {r3-r5, r12}
 	orr	lr, lr, r12, lsr #24
 	mov	r12, r12, lsl #8
 	orr	r12, r12, r5, lsr #24
 	mov	r5, r5, lsl #8
 	orr	r5, r5, r4, lsr #24
 	mov	r4, r4, lsl #8
 	orr	r4, r4, r3, lsr #24
 	stmdb	r0!, {r4, r5, r12, lr}
 	subs	r2, r2, #0x10
 	bge	.Lmemmove_bsrcul3loop16
 	ldmia	sp!, {r4, r5, lr}
 	adds	r2, r2, #0x0c
 	blt	.Lmemmove_bsrcul3l4
 
 .Lmemmove_bsrcul3loop4:
 	mov	r12, r3, lsl #8
 	ldr	r3, [r1, #-4]!
 	orr	r12, r12, r3, lsr #24
 	str	r12, [r0, #-4]!
 	subs	r2, r2, #4
 	bge	.Lmemmove_bsrcul3loop4
 
 .Lmemmove_bsrcul3l4:
 	add	r1, r1, #3
 	b	.Lmemmove_bl4
 
 .Lmemmove_bsrcul2:
 	cmp	r2, #0x0c
 	blt	.Lmemmove_bsrcul2loop4
 	sub	r2, r2, #0x0c
 	stmdb	sp!, {r4, r5, lr}
 
 .Lmemmove_bsrcul2loop16:
 	mov	lr, r3, lsl #16
 	ldmdb	r1!, {r3-r5, r12}
 	orr	lr, lr, r12, lsr #16
 	mov	r12, r12, lsl #16
 	orr	r12, r12, r5, lsr #16
 	mov	r5, r5, lsl #16
 	orr	r5, r5, r4, lsr #16
 	mov	r4, r4, lsl #16
 	orr	r4, r4, r3, lsr #16
 	stmdb	r0!, {r4, r5, r12, lr}
 	subs	r2, r2, #0x10
 	bge	.Lmemmove_bsrcul2loop16
 	ldmia	sp!, {r4, r5, lr}
 	adds	r2, r2, #0x0c
 	blt	.Lmemmove_bsrcul2l4
 
 .Lmemmove_bsrcul2loop4:
 	mov	r12, r3, lsl #16
 	ldr	r3, [r1, #-4]!
 	orr	r12, r12, r3, lsr #16
 	str	r12, [r0, #-4]!
 	subs	r2, r2, #4
 	bge	.Lmemmove_bsrcul2loop4
 
 .Lmemmove_bsrcul2l4:
 	add	r1, r1, #2
 	b	.Lmemmove_bl4
 
 .Lmemmove_bsrcul1:
 	cmp	r2, #0x0c
 	blt	.Lmemmove_bsrcul1loop4
 	sub	r2, r2, #0x0c
 	stmdb	sp!, {r4, r5, lr}
 
 .Lmemmove_bsrcul1loop32:
 	mov	lr, r3, lsl #24
 	ldmdb	r1!, {r3-r5, r12}
 	orr	lr, lr, r12, lsr #8
 	mov	r12, r12, lsl #24
 	orr	r12, r12, r5, lsr #8
 	mov	r5, r5, lsl #24
 	orr	r5, r5, r4, lsr #8
 	mov	r4, r4, lsl #24
 	orr	r4, r4, r3, lsr #8
 	stmdb	r0!, {r4, r5, r12, lr}
 	subs	r2, r2, #0x10
 	bge	.Lmemmove_bsrcul1loop32
 	ldmia	sp!, {r4, r5, lr}
 	adds	r2, r2, #0x0c
 	blt	.Lmemmove_bsrcul1l4
 
 .Lmemmove_bsrcul1loop4:
 	mov	r12, r3, lsl #24
 	ldr	r3, [r1, #-4]!
 	orr	r12, r12, r3, lsr #8
 	str	r12, [r0, #-4]!
 	subs	r2, r2, #4
 	bge	.Lmemmove_bsrcul1loop4
 
 .Lmemmove_bsrcul1l4:
 	add	r1, r1, #1
 	b	.Lmemmove_bl4
 EEND(memmove)
 END(bcopy)
 
 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
 ENTRY(memcpy)
 	pld	[r1]
 	cmp	r2, #0x0c
 	ble	.Lmemcpy_short		/* <= 12 bytes */
 #ifdef FLASHADDR
 #if FLASHADDR > PHYSADDR
 	ldr	r3, =FLASHADDR
 	cmp	r3, pc
 	bls	.Lnormal
 #else
 	ldr	r3, =FLASHADDR
 	cmp	r3, pc
 	bhi	.Lnormal
 #endif
 #endif
-	ldr	r3, .L_arm_memcpy
-	ldr	r3, [r3]
-	cmp	r3, #0
-	beq	.Lnormal
-	ldr	r3, .L_min_memcpy_size
-	ldr	r3, [r3]
-	cmp	r2, r3
-	blt	.Lnormal
-	stmfd	sp!, {r0-r2, r4, lr}
-	mov	r3, #0
-	ldr	r4, .L_arm_memcpy
-	mov	lr, pc
-	ldr	pc, [r4]
-	cmp	r0, #0
-	ldmfd	sp!, {r0-r2, r4, lr}
-	RETeq
-.Lnormal:
 	mov	r3, r0			/* We must not clobber r0 */
 
 	/* Word-align the destination buffer */
 	ands	ip, r3, #0x03		/* Already word aligned? */
 	beq	.Lmemcpy_wordaligned	/* Yup */
 	cmp	ip, #0x02
 	ldrb	ip, [r1], #0x01
 	sub	r2, r2, #0x01
 	strb	ip, [r3], #0x01
 	ldrble	ip, [r1], #0x01
 	suble	r2, r2, #0x01
 	strble	ip, [r3], #0x01
 	ldrblt	ip, [r1], #0x01
 	sublt	r2, r2, #0x01
 	strblt	ip, [r3], #0x01
 
 	/* Destination buffer is now word aligned */
 .Lmemcpy_wordaligned:
 	ands	ip, r1, #0x03		/* Is src also word-aligned? */
 	bne	.Lmemcpy_bad_align	/* Nope. Things just got bad */
 
 	/* Quad-align the destination buffer */
 	tst	r3, #0x07		/* Already quad aligned? */
 	ldrne	ip, [r1], #0x04
 	stmfd	sp!, {r4-r9}		/* Free up some registers */
 	subne	r2, r2, #0x04
 	strne	ip, [r3], #0x04
 
 	/* Destination buffer quad aligned, source is at least word aligned */
 	subs	r2, r2, #0x80
 	blt	.Lmemcpy_w_lessthan128
 
 	/* Copy 128 bytes at a time */
 .Lmemcpy_w_loop128:
 	ldr	r4, [r1], #0x04		/* LD:00-03 */
 	ldr	r5, [r1], #0x04		/* LD:04-07 */
 	pld	[r1, #0x18]		/* Prefetch 0x20 */
 	ldr	r6, [r1], #0x04		/* LD:08-0b */
 	ldr	r7, [r1], #0x04		/* LD:0c-0f */
 	ldr	r8, [r1], #0x04		/* LD:10-13 */
 	ldr	r9, [r1], #0x04		/* LD:14-17 */
 	strd	r4, [r3], #0x08		/* ST:00-07 */
 	ldr	r4, [r1], #0x04		/* LD:18-1b */
 	ldr	r5, [r1], #0x04		/* LD:1c-1f */
 	strd	r6, [r3], #0x08		/* ST:08-0f */
 	ldr	r6, [r1], #0x04		/* LD:20-23 */
 	ldr	r7, [r1], #0x04		/* LD:24-27 */
 	pld	[r1, #0x18]		/* Prefetch 0x40 */
 	strd	r8, [r3], #0x08		/* ST:10-17 */
 	ldr	r8, [r1], #0x04		/* LD:28-2b */
 	ldr	r9, [r1], #0x04		/* LD:2c-2f */
 	strd	r4, [r3], #0x08		/* ST:18-1f */
 	ldr	r4, [r1], #0x04		/* LD:30-33 */
 	ldr	r5, [r1], #0x04		/* LD:34-37 */
 	strd	r6, [r3], #0x08		/* ST:20-27 */
 	ldr	r6, [r1], #0x04		/* LD:38-3b */
 	ldr	r7, [r1], #0x04		/* LD:3c-3f */
 	strd	r8, [r3], #0x08		/* ST:28-2f */
 	ldr	r8, [r1], #0x04		/* LD:40-43 */
 	ldr	r9, [r1], #0x04		/* LD:44-47 */
 	pld	[r1, #0x18]		/* Prefetch 0x60 */
 	strd	r4, [r3], #0x08		/* ST:30-37 */
 	ldr	r4, [r1], #0x04		/* LD:48-4b */
 	ldr	r5, [r1], #0x04		/* LD:4c-4f */
 	strd	r6, [r3], #0x08		/* ST:38-3f */
 	ldr	r6, [r1], #0x04		/* LD:50-53 */
 	ldr	r7, [r1], #0x04		/* LD:54-57 */
 	strd	r8, [r3], #0x08		/* ST:40-47 */
 	ldr	r8, [r1], #0x04		/* LD:58-5b */
 	ldr	r9, [r1], #0x04		/* LD:5c-5f */
 	strd	r4, [r3], #0x08		/* ST:48-4f */
 	ldr	r4, [r1], #0x04		/* LD:60-63 */
 	ldr	r5, [r1], #0x04		/* LD:64-67 */
 	pld	[r1, #0x18]		/* Prefetch 0x80 */
 	strd	r6, [r3], #0x08		/* ST:50-57 */
 	ldr	r6, [r1], #0x04		/* LD:68-6b */
 	ldr	r7, [r1], #0x04		/* LD:6c-6f */
 	strd	r8, [r3], #0x08		/* ST:58-5f */
 	ldr	r8, [r1], #0x04		/* LD:70-73 */
 	ldr	r9, [r1], #0x04		/* LD:74-77 */
 	strd	r4, [r3], #0x08		/* ST:60-67 */
 	ldr	r4, [r1], #0x04		/* LD:78-7b */
 	ldr	r5, [r1], #0x04		/* LD:7c-7f */
 	strd	r6, [r3], #0x08		/* ST:68-6f */
 	strd	r8, [r3], #0x08		/* ST:70-77 */
 	subs	r2, r2, #0x80
 	strd	r4, [r3], #0x08		/* ST:78-7f */
 	bge	.Lmemcpy_w_loop128
 
 .Lmemcpy_w_lessthan128:
 	adds	r2, r2, #0x80		/* Adjust for extra sub */
 	ldmfdeq	sp!, {r4-r9}
 	RETeq			/* Return now if done */
 	subs	r2, r2, #0x20
 	blt	.Lmemcpy_w_lessthan32
 
 	/* Copy 32 bytes at a time */
 .Lmemcpy_w_loop32:
 	ldr	r4, [r1], #0x04
 	ldr	r5, [r1], #0x04
 	pld	[r1, #0x18]
 	ldr	r6, [r1], #0x04
 	ldr	r7, [r1], #0x04
 	ldr	r8, [r1], #0x04
 	ldr	r9, [r1], #0x04
 	strd	r4, [r3], #0x08
 	ldr	r4, [r1], #0x04
 	ldr	r5, [r1], #0x04
 	strd	r6, [r3], #0x08
 	strd	r8, [r3], #0x08
 	subs	r2, r2, #0x20
 	strd	r4, [r3], #0x08
 	bge	.Lmemcpy_w_loop32
 
 .Lmemcpy_w_lessthan32:
 	adds	r2, r2, #0x20		/* Adjust for extra sub */
 	ldmfdeq	sp!, {r4-r9}
 	RETeq			/* Return now if done */
 
 	and	r4, r2, #0x18
 	rsbs	r4, r4, #0x18
 	addne	pc, pc, r4, lsl #1
 	nop
 
 	/* At least 24 bytes remaining */
 	ldr	r4, [r1], #0x04
 	ldr	r5, [r1], #0x04
 	sub	r2, r2, #0x08
 	strd	r4, [r3], #0x08
 
 	/* At least 16 bytes remaining */
 	ldr	r4, [r1], #0x04
 	ldr	r5, [r1], #0x04
 	sub	r2, r2, #0x08
 	strd	r4, [r3], #0x08
 
 	/* At least 8 bytes remaining */
 	ldr	r4, [r1], #0x04
 	ldr	r5, [r1], #0x04
 	subs	r2, r2, #0x08
 	strd	r4, [r3], #0x08
 
 	/* Less than 8 bytes remaining */
 	ldmfd	sp!, {r4-r9}
 	RETeq			/* Return now if done */
 	subs	r2, r2, #0x04
 	ldrge	ip, [r1], #0x04
 	strge	ip, [r3], #0x04
 	RETeq			/* Return now if done */
 	addlt	r2, r2, #0x04
 	ldrb	ip, [r1], #0x01
 	cmp	r2, #0x02
 	ldrbge	r2, [r1], #0x01
 	strb	ip, [r3], #0x01
 	ldrbgt	ip, [r1]
 	strbge	r2, [r3], #0x01
 	strbgt	ip, [r3]
 	RET
 /* Place a literal pool here for the above ldr instructions to use */
 .ltorg
 
 
 /*
  * At this point, it has not been possible to word align both buffers.
  * The destination buffer is word aligned, but the source buffer is not.
  */
 .Lmemcpy_bad_align:
 	stmfd	sp!, {r4-r7}
 	bic	r1, r1, #0x03
 	cmp	ip, #2
 	ldr	ip, [r1], #0x04
 	bgt	.Lmemcpy_bad3
 	beq	.Lmemcpy_bad2
 	b	.Lmemcpy_bad1
 
 .Lmemcpy_bad1_loop16:
 	mov	r4, ip, lsr #8
 	ldr	r5, [r1], #0x04
 	pld	[r1, #0x018]
 	ldr	r6, [r1], #0x04
 	ldr	r7, [r1], #0x04
 	ldr	ip, [r1], #0x04
 	orr	r4, r4, r5, lsl #24
 	mov	r5, r5, lsr #8
 	orr	r5, r5, r6, lsl #24
 	mov	r6, r6, lsr #8
 	orr	r6, r6, r7, lsl #24
 	mov	r7, r7, lsr #8
 	orr	r7, r7, ip, lsl #24
 	str	r4, [r3], #0x04
 	str	r5, [r3], #0x04
 	str	r6, [r3], #0x04
 	str	r7, [r3], #0x04
 .Lmemcpy_bad1:
 	subs	r2, r2, #0x10
 	bge	.Lmemcpy_bad1_loop16
 
 	adds	r2, r2, #0x10
 	ldmfdeq	sp!, {r4-r7}
 	RETeq			/* Return now if done */
 	subs	r2, r2, #0x04
 	sublt	r1, r1, #0x03
 	blt	.Lmemcpy_bad_done
 
 .Lmemcpy_bad1_loop4:
 	mov	r4, ip, lsr #8
 	ldr	ip, [r1], #0x04
 	subs	r2, r2, #0x04
 	orr	r4, r4, ip, lsl #24
 	str	r4, [r3], #0x04
 	bge	.Lmemcpy_bad1_loop4
 	sub	r1, r1, #0x03
 	b	.Lmemcpy_bad_done
 
 .Lmemcpy_bad2_loop16:
 	mov	r4, ip, lsr #16
 	ldr	r5, [r1], #0x04
 	pld	[r1, #0x018]
 	ldr	r6, [r1], #0x04
 	ldr	r7, [r1], #0x04
 	ldr	ip, [r1], #0x04
 	orr	r4, r4, r5, lsl #16
 	mov	r5, r5, lsr #16
 	orr	r5, r5, r6, lsl #16
 	mov	r6, r6, lsr #16
 	orr	r6, r6, r7, lsl #16
 	mov	r7, r7, lsr #16
 	orr	r7, r7, ip, lsl #16
 	str	r4, [r3], #0x04
 	str	r5, [r3], #0x04
 	str	r6, [r3], #0x04
 	str	r7, [r3], #0x04
 .Lmemcpy_bad2:
 	subs	r2, r2, #0x10
 	bge	.Lmemcpy_bad2_loop16
 
 	adds	r2, r2, #0x10
 	ldmfdeq	sp!, {r4-r7}
 	RETeq			/* Return now if done */
 	subs	r2, r2, #0x04
 	sublt	r1, r1, #0x02
 	blt	.Lmemcpy_bad_done
 
 .Lmemcpy_bad2_loop4:
 	mov	r4, ip, lsr #16
 	ldr	ip, [r1], #0x04
 	subs	r2, r2, #0x04
 	orr	r4, r4, ip, lsl #16
 	str	r4, [r3], #0x04
 	bge	.Lmemcpy_bad2_loop4
 	sub	r1, r1, #0x02
 	b	.Lmemcpy_bad_done
 
 .Lmemcpy_bad3_loop16:
 	mov	r4, ip, lsr #24
 	ldr	r5, [r1], #0x04
 	pld	[r1, #0x018]
 	ldr	r6, [r1], #0x04
 	ldr	r7, [r1], #0x04
 	ldr	ip, [r1], #0x04
 	orr	r4, r4, r5, lsl #8
 	mov	r5, r5, lsr #24
 	orr	r5, r5, r6, lsl #8
 	mov	r6, r6, lsr #24
 	orr	r6, r6, r7, lsl #8
 	mov	r7, r7, lsr #24
 	orr	r7, r7, ip, lsl #8
 	str	r4, [r3], #0x04
 	str	r5, [r3], #0x04
 	str	r6, [r3], #0x04
 	str	r7, [r3], #0x04
 .Lmemcpy_bad3:
 	subs	r2, r2, #0x10
 	bge	.Lmemcpy_bad3_loop16
 
 	adds	r2, r2, #0x10
 	ldmfdeq	sp!, {r4-r7}
 	RETeq			/* Return now if done */
 	subs	r2, r2, #0x04
 	sublt	r1, r1, #0x01
 	blt	.Lmemcpy_bad_done
 
 .Lmemcpy_bad3_loop4:
 	mov	r4, ip, lsr #24
 	ldr	ip, [r1], #0x04
 	subs	r2, r2, #0x04
 	orr	r4, r4, ip, lsl #8
 	str	r4, [r3], #0x04
 	bge	.Lmemcpy_bad3_loop4
 	sub	r1, r1, #0x01
 
 .Lmemcpy_bad_done:
 	ldmfd	sp!, {r4-r7}
 	adds	r2, r2, #0x04
 	RETeq
 	ldrb	ip, [r1], #0x01
 	cmp	r2, #0x02
 	ldrbge	r2, [r1], #0x01
 	strb	ip, [r3], #0x01
 	ldrbgt	ip, [r1]
 	strbge	r2, [r3], #0x01
 	strbgt	ip, [r3]
 	RET
 
 
 /*
  * Handle short copies (less than 16 bytes), possibly misaligned.
  * Some of these are *very* common, thanks to the network stack,
  * and so are handled specially.
  */
 .Lmemcpy_short:
 	add	pc, pc, r2, lsl #2
 	nop
 	RET			/* 0x00 */
 	b	.Lmemcpy_bytewise	/* 0x01 */
 	b	.Lmemcpy_bytewise	/* 0x02 */
 	b	.Lmemcpy_bytewise	/* 0x03 */
 	b	.Lmemcpy_4		/* 0x04 */
 	b	.Lmemcpy_bytewise	/* 0x05 */
 	b	.Lmemcpy_6		/* 0x06 */
 	b	.Lmemcpy_bytewise	/* 0x07 */
 	b	.Lmemcpy_8		/* 0x08 */
 	b	.Lmemcpy_bytewise	/* 0x09 */
 	b	.Lmemcpy_bytewise	/* 0x0a */
 	b	.Lmemcpy_bytewise	/* 0x0b */
 	b	.Lmemcpy_c		/* 0x0c */
 .Lmemcpy_bytewise:
 	mov	r3, r0			/* We must not clobber r0 */
 	ldrb	ip, [r1], #0x01
 1:	subs	r2, r2, #0x01
 	strb	ip, [r3], #0x01
 	ldrbne	ip, [r1], #0x01
 	bne	1b
 	RET
 
 /******************************************************************************
  * Special case for 4 byte copies
  */
 #define	LMEMCPY_4_LOG2	6	/* 64 bytes */
 #define	LMEMCPY_4_PAD	.align LMEMCPY_4_LOG2
 	LMEMCPY_4_PAD
 .Lmemcpy_4:
 	and	r2, r1, #0x03
 	orr	r2, r2, r0, lsl #2
 	ands	r2, r2, #0x0f
 	sub	r3, pc, #0x14
 	addne	pc, r3, r2, lsl #LMEMCPY_4_LOG2
 
 /*
  * 0000: dst is 32-bit aligned, src is 32-bit aligned
  */
 	ldr	r2, [r1]
 	str	r2, [r0]
 	RET
 	LMEMCPY_4_PAD
 
 /*
  * 0001: dst is 32-bit aligned, src is 8-bit aligned
  */
 	ldr	r3, [r1, #-1]		/* BE:r3 = x012  LE:r3 = 210x */
 	ldr	r2, [r1, #3]		/* BE:r2 = 3xxx  LE:r2 = xxx3 */
 	mov	r3, r3, lsr #8		/* r3 = .210 */
 	orr	r3, r3, r2, lsl #24	/* r3 = 3210 */
 	str	r3, [r0]
 	RET
 	LMEMCPY_4_PAD
 
 /*
  * 0010: dst is 32-bit aligned, src is 16-bit aligned
  */
 	ldrh	r3, [r1, #0x02]
 	ldrh	r2, [r1]
 	orr	r3, r2, r3, lsl #16
 	str	r3, [r0]
 	RET
 	LMEMCPY_4_PAD
 
 /*
  * 0011: dst is 32-bit aligned, src is 8-bit aligned
  */
 	ldr	r3, [r1, #-3]		/* BE:r3 = xxx0  LE:r3 = 0xxx */
 	ldr	r2, [r1, #1]		/* BE:r2 = 123x  LE:r2 = x321 */
 	mov	r3, r3, lsr #24		/* r3 = ...0 */
 	orr	r3, r3, r2, lsl #8	/* r3 = 3210 */
 	str	r3, [r0]
 	RET
 	LMEMCPY_4_PAD
 
 /*
  * 0100: dst is 8-bit aligned, src is 32-bit aligned
  */
 	ldr	r2, [r1]
 	strb	r2, [r0]
 	mov	r3, r2, lsr #8
 	mov	r1, r2, lsr #24
 	strb	r1, [r0, #0x03]
 	strh	r3, [r0, #0x01]
 	RET
 	LMEMCPY_4_PAD
 
 /*
  * 0101: dst is 8-bit aligned, src is 8-bit aligned
  */
 	ldrb	r2, [r1]
 	ldrh	r3, [r1, #0x01]
 	ldrb	r1, [r1, #0x03]
 	strb	r2, [r0]
 	strh	r3, [r0, #0x01]
 	strb	r1, [r0, #0x03]
 	RET
 	LMEMCPY_4_PAD
 
 /*
  * 0110: dst is 8-bit aligned, src is 16-bit aligned
  */
 	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
 	ldrh	r3, [r1, #0x02]		/* LE:r3 = ..23  LE:r3 = ..32 */
 	strb	r2, [r0]
 	mov	r2, r2, lsr #8		/* r2 = ...1 */
 	orr	r2, r2, r3, lsl #8	/* r2 = .321 */
 	mov	r3, r3, lsr #8		/* r3 = ...3 */
 	strh	r2, [r0, #0x01]
 	strb	r3, [r0, #0x03]
 	RET
 	LMEMCPY_4_PAD
 
 /*
  * 0111: dst is 8-bit aligned, src is 8-bit aligned
  */
 	ldrb	r2, [r1]
 	ldrh	r3, [r1, #0x01]
 	ldrb	r1, [r1, #0x03]
 	strb	r2, [r0]
 	strh	r3, [r0, #0x01]
 	strb	r1, [r0, #0x03]
 	RET
 	LMEMCPY_4_PAD
 
 /*
  * 1000: dst is 16-bit aligned, src is 32-bit aligned
  */
 	ldr	r2, [r1]
 	strh	r2, [r0]
 	mov	r3, r2, lsr #16
 	strh	r3, [r0, #0x02]
 	RET
 	LMEMCPY_4_PAD
 
 /*
  * 1001: dst is 16-bit aligned, src is 8-bit aligned
  */
 	ldr	r2, [r1, #-1]		/* BE:r2 = x012  LE:r2 = 210x */
 	ldr	r3, [r1, #3]		/* BE:r3 = 3xxx  LE:r3 = xxx3 */
 	mov	r1, r2, lsr #8		/* BE:r1 = .x01  LE:r1 = .210 */
 	strh	r1, [r0]
 	mov	r2, r2, lsr #24		/* r2 = ...2 */
 	orr	r2, r2, r3, lsl #8	/* r2 = xx32 */
 	strh	r2, [r0, #0x02]
 	RET
 	LMEMCPY_4_PAD
 
 /*
  * 1010: dst is 16-bit aligned, src is 16-bit aligned
  */
 	ldrh	r2, [r1]
 	ldrh	r3, [r1, #0x02]
 	strh	r2, [r0]
 	strh	r3, [r0, #0x02]
 	RET
 	LMEMCPY_4_PAD
 
 /*
  * 1011: dst is 16-bit aligned, src is 8-bit aligned
  */
 	ldr	r3, [r1, #1]		/* BE:r3 = 123x  LE:r3 = x321 */
 	ldr	r2, [r1, #-3]		/* BE:r2 = xxx0  LE:r2 = 0xxx */
 	mov	r1, r3, lsr #8		/* BE:r1 = .123  LE:r1 = .x32 */
 	strh	r1, [r0, #0x02]
 	mov	r3, r3, lsl #8		/* r3 = 321. */
 	orr	r3, r3, r2, lsr #24	/* r3 = 3210 */
 	strh	r3, [r0]
 	RET
 	LMEMCPY_4_PAD
 
 /*
  * 1100: dst is 8-bit aligned, src is 32-bit aligned
  */
 	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
 	strb	r2, [r0]
 	mov	r3, r2, lsr #8
 	mov	r1, r2, lsr #24
 	strh	r3, [r0, #0x01]
 	strb	r1, [r0, #0x03]
 	RET
 	LMEMCPY_4_PAD
 
 /*
  * 1101: dst is 8-bit aligned, src is 8-bit aligned
  */
 	ldrb	r2, [r1]
 	ldrh	r3, [r1, #0x01]
 	ldrb	r1, [r1, #0x03]
 	strb	r2, [r0]
 	strh	r3, [r0, #0x01]
 	strb	r1, [r0, #0x03]
 	RET
 	LMEMCPY_4_PAD
 
 /*
  * 1110: dst is 8-bit aligned, src is 16-bit aligned
  */
 	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
 	ldrh	r3, [r1, #0x02]		/* BE:r3 = ..23  LE:r3 = ..32 */
 	strb	r2, [r0]
 	mov	r2, r2, lsr #8		/* r2 = ...1 */
 	orr	r2, r2, r3, lsl #8	/* r2 = .321 */
 	strh	r2, [r0, #0x01]
 	mov	r3, r3, lsr #8		/* r3 = ...3 */
 	strb	r3, [r0, #0x03]
 	RET
 	LMEMCPY_4_PAD
 
 /*
  * 1111: dst is 8-bit aligned, src is 8-bit aligned
  */
 	ldrb	r2, [r1]
 	ldrh	r3, [r1, #0x01]
 	ldrb	r1, [r1, #0x03]
 	strb	r2, [r0]
 	strh	r3, [r0, #0x01]
 	strb	r1, [r0, #0x03]
 	RET
 	LMEMCPY_4_PAD
 
 
 /******************************************************************************
  * Special case for 6 byte copies
  */
 #define	LMEMCPY_6_LOG2	6	/* 64 bytes */
 #define	LMEMCPY_6_PAD	.align LMEMCPY_6_LOG2
 	LMEMCPY_6_PAD
 .Lmemcpy_6:
 	and	r2, r1, #0x03
 	orr	r2, r2, r0, lsl #2
 	ands	r2, r2, #0x0f
 	sub	r3, pc, #0x14
 	addne	pc, r3, r2, lsl #LMEMCPY_6_LOG2
 
 /*
  * 0000: dst is 32-bit aligned, src is 32-bit aligned
  */
 	ldr	r2, [r1]
 	ldrh	r3, [r1, #0x04]
 	str	r2, [r0]
 	strh	r3, [r0, #0x04]
 	RET
 	LMEMCPY_6_PAD
 
 /*
  * 0001: dst is 32-bit aligned, src is 8-bit aligned
  */
 	ldr	r2, [r1, #-1]		/* BE:r2 = x012  LE:r2 = 210x */
 	ldr	r3, [r1, #0x03]		/* BE:r3 = 345x  LE:r3 = x543 */
 	mov	r2, r2, lsr #8		/* r2 = .210 */
 	orr	r2, r2, r3, lsl #24	/* r2 = 3210 */
 	mov	r3, r3, lsr #8		/* BE:r3 = .345  LE:r3 = .x54 */
 	str	r2, [r0]
 	strh	r3, [r0, #0x04]
 	RET
 	LMEMCPY_6_PAD
 
 /*
  * 0010: dst is 32-bit aligned, src is 16-bit aligned
  */
 	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
 	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
 	mov	r1, r3, lsr #16		/* r1 = ..54 */
 	orr	r2, r2, r3, lsl #16	/* r2 = 3210 */
 	str	r2, [r0]
 	strh	r1, [r0, #0x04]
 	RET
 	LMEMCPY_6_PAD
 
 /*
  * 0011: dst is 32-bit aligned, src is 8-bit aligned
  */
 	ldr	r2, [r1, #-3]		/* BE:r2 = xxx0  LE:r2 = 0xxx */
 	ldr	r3, [r1, #1]		/* BE:r3 = 1234  LE:r3 = 4321 */
 	ldr	r1, [r1, #5]		/* BE:r1 = 5xxx  LE:r3 = xxx5 */
 	mov	r2, r2, lsr #24		/* r2 = ...0 */
 	orr	r2, r2, r3, lsl #8	/* r2 = 3210 */
 	mov	r1, r1, lsl #8		/* r1 = xx5. */
 	orr	r1, r1, r3, lsr #24	/* r1 = xx54 */
 	str	r2, [r0]
 	strh	r1, [r0, #0x04]
 	RET
 	LMEMCPY_6_PAD
 
 /*
  * 0100: dst is 8-bit aligned, src is 32-bit aligned
  */
 	ldr	r3, [r1]		/* BE:r3 = 0123  LE:r3 = 3210 */
 	ldrh	r2, [r1, #0x04]		/* BE:r2 = ..45  LE:r2 = ..54 */
 	mov	r1, r3, lsr #8		/* BE:r1 = .012  LE:r1 = .321 */
 	strh	r1, [r0, #0x01]
 	strb	r3, [r0]
 	mov	r3, r3, lsr #24		/* r3 = ...3 */
 	orr	r3, r3, r2, lsl #8	/* r3 = .543 */
 	mov	r2, r2, lsr #8		/* r2 = ...5 */
 	strh	r3, [r0, #0x03]
 	strb	r2, [r0, #0x05]
 	RET
 	LMEMCPY_6_PAD
 
 /*
  * 0101: dst is 8-bit aligned, src is 8-bit aligned
  */
 	ldrb	r2, [r1]
 	ldrh	r3, [r1, #0x01]
 	ldrh	ip, [r1, #0x03]
 	ldrb	r1, [r1, #0x05]
 	strb	r2, [r0]
 	strh	r3, [r0, #0x01]
 	strh	ip, [r0, #0x03]
 	strb	r1, [r0, #0x05]
 	RET
 	LMEMCPY_6_PAD
 
 /*
  * 0110: dst is 8-bit aligned, src is 16-bit aligned
  */
 	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
 	ldr	r1, [r1, #0x02]		/* BE:r1 = 2345  LE:r1 = 5432 */
 	strb	r2, [r0]
 	mov	r3, r1, lsr #24
 	strb	r3, [r0, #0x05]
 	mov	r3, r1, lsr #8		/* r3 = .543 */
 	strh	r3, [r0, #0x03]
 	mov	r3, r2, lsr #8		/* r3 = ...1 */
 	orr	r3, r3, r1, lsl #8	/* r3 = 4321 */
 	strh	r3, [r0, #0x01]
 	RET
 	LMEMCPY_6_PAD
 
 /*
  * 0111: dst is 8-bit aligned, src is 8-bit aligned
  */
 	ldrb	r2, [r1]
 	ldrh	r3, [r1, #0x01]
 	ldrh	ip, [r1, #0x03]
 	ldrb	r1, [r1, #0x05]
 	strb	r2, [r0]
 	strh	r3, [r0, #0x01]
 	strh	ip, [r0, #0x03]
 	strb	r1, [r0, #0x05]
 	RET
 	LMEMCPY_6_PAD
 
 /*
  * 1000: dst is 16-bit aligned, src is 32-bit aligned
  */
 	ldrh	r2, [r1, #0x04]		/* r2 = ..54 */
 	ldr	r3, [r1]		/* r3 = 3210 */
 	mov	r2, r2, lsl #16		/* r2 = 54.. */
 	orr	r2, r2, r3, lsr #16	/* r2 = 5432 */
 	strh	r3, [r0]
 	str	r2, [r0, #0x02]
 	RET
 	LMEMCPY_6_PAD
 
 /*
  * 1001: dst is 16-bit aligned, src is 8-bit aligned
  */
 	ldr	r3, [r1, #-1]		/* BE:r3 = x012  LE:r3 = 210x */
 	ldr	r2, [r1, #3]		/* BE:r2 = 345x  LE:r2 = x543 */
 	mov	r1, r3, lsr #8		/* BE:r1 = .x01  LE:r1 = .210 */
 	mov	r2, r2, lsl #8		/* r2 = 543. */
 	orr	r2, r2, r3, lsr #24	/* r2 = 5432 */
 	strh	r1, [r0]
 	str	r2, [r0, #0x02]
 	RET
 	LMEMCPY_6_PAD
 
 /*
  * 1010: dst is 16-bit aligned, src is 16-bit aligned
  */
 	ldrh	r2, [r1]
 	ldr	r3, [r1, #0x02]
 	strh	r2, [r0]
 	str	r3, [r0, #0x02]
 	RET
 	LMEMCPY_6_PAD
 
 /*
  * 1011: dst is 16-bit aligned, src is 8-bit aligned
  */
 	ldrb	r3, [r1]		/* r3 = ...0 */
 	ldr	r2, [r1, #0x01]		/* BE:r2 = 1234  LE:r2 = 4321 */
 	ldrb	r1, [r1, #0x05]		/* r1 = ...5 */
 	orr	r3, r3, r2, lsl #8	/* r3 = 3210 */
 	mov	r1, r1, lsl #24		/* r1 = 5... */
 	orr	r1, r1, r2, lsr #8	/* r1 = 5432 */
 	strh	r3, [r0]
 	str	r1, [r0, #0x02]
 	RET
 	LMEMCPY_6_PAD
 
 /*
  * 1100: dst is 8-bit aligned, src is 32-bit aligned
  */
 	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
 	ldrh	r1, [r1, #0x04]		/* BE:r1 = ..45  LE:r1 = ..54 */
 	strb	r2, [r0]
 	mov	r2, r2, lsr #8		/* r2 = .321 */
 	orr	r2, r2, r1, lsl #24	/* r2 = 4321 */
 	mov	r1, r1, lsr #8		/* r1 = ...5 */
 	str	r2, [r0, #0x01]
 	strb	r1, [r0, #0x05]
 	RET
 	LMEMCPY_6_PAD
 
 /*
  * 1101: dst is 8-bit aligned, src is 8-bit aligned
  */
 	ldrb	r2, [r1]
 	ldrh	r3, [r1, #0x01]
 	ldrh	ip, [r1, #0x03]
 	ldrb	r1, [r1, #0x05]
 	strb	r2, [r0]
 	strh	r3, [r0, #0x01]
 	strh	ip, [r0, #0x03]
 	strb	r1, [r0, #0x05]
 	RET
 	LMEMCPY_6_PAD
 
 /*
  * 1110: dst is 8-bit aligned, src is 16-bit aligned
  */
 	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
 	ldr	r1, [r1, #0x02]		/* BE:r1 = 2345  LE:r1 = 5432 */
 	strb	r2, [r0]
 	mov	r2, r2, lsr #8		/* r2 = ...1 */
 	orr	r2, r2, r1, lsl #8	/* r2 = 4321 */
 	mov	r1, r1, lsr #24		/* r1 = ...5 */
 	str	r2, [r0, #0x01]
 	strb	r1, [r0, #0x05]
 	RET
 	LMEMCPY_6_PAD
 
 /*
  * 1111: dst is 8-bit aligned, src is 8-bit aligned
  */
 	ldrb	r2, [r1]
 	ldr	r3, [r1, #0x01]
 	ldrb	r1, [r1, #0x05]
 	strb	r2, [r0]
 	str	r3, [r0, #0x01]
 	strb	r1, [r0, #0x05]
 	RET
 	LMEMCPY_6_PAD
 
 
 /******************************************************************************
  * Special case for 8 byte copies
  */
 #define	LMEMCPY_8_LOG2	6	/* 64 bytes */
 #define	LMEMCPY_8_PAD	.align LMEMCPY_8_LOG2
 	LMEMCPY_8_PAD
 .Lmemcpy_8:
 	and	r2, r1, #0x03
 	orr	r2, r2, r0, lsl #2
 	ands	r2, r2, #0x0f
 	sub	r3, pc, #0x14
 	addne	pc, r3, r2, lsl #LMEMCPY_8_LOG2
 
 /*
  * 0000: dst is 32-bit aligned, src is 32-bit aligned
  */
 	ldr	r2, [r1]
 	ldr	r3, [r1, #0x04]
 	str	r2, [r0]
 	str	r3, [r0, #0x04]
 	RET
 	LMEMCPY_8_PAD
 
 /*
  * 0001: dst is 32-bit aligned, src is 8-bit aligned
  */
 	ldr	r3, [r1, #-1]		/* BE:r3 = x012  LE:r3 = 210x */
 	ldr	r2, [r1, #0x03]		/* BE:r2 = 3456  LE:r2 = 6543 */
 	ldrb	r1, [r1, #0x07]		/* r1 = ...7 */
 	mov	r3, r3, lsr #8		/* r3 = .210 */
 	orr	r3, r3, r2, lsl #24	/* r3 = 3210 */
 	mov	r1, r1, lsl #24		/* r1 = 7... */
 	orr	r2, r1, r2, lsr #8	/* r2 = 7654 */
 	str	r3, [r0]
 	str	r2, [r0, #0x04]
 	RET
 	LMEMCPY_8_PAD
 
 /*
  * 0010: dst is 32-bit aligned, src is 16-bit aligned
  */
 	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
 	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
 	ldrh	r1, [r1, #0x06]		/* BE:r1 = ..67  LE:r1 = ..76 */
 	orr	r2, r2, r3, lsl #16	/* r2 = 3210 */
 	mov	r3, r3, lsr #16		/* r3 = ..54 */
 	orr	r3, r3, r1, lsl #16	/* r3 = 7654 */
 	str	r2, [r0]
 	str	r3, [r0, #0x04]
 	RET
 	LMEMCPY_8_PAD
 
 /*
  * 0011: dst is 32-bit aligned, src is 8-bit aligned
  */
 	ldrb	r3, [r1]		/* r3 = ...0 */
 	ldr	r2, [r1, #0x01]		/* BE:r2 = 1234  LE:r2 = 4321 */
 	ldr	r1, [r1, #0x05]		/* BE:r1 = 567x  LE:r1 = x765 */
 	orr	r3, r3, r2, lsl #8	/* r3 = 3210 */
 	mov	r2, r2, lsr #24		/* r2 = ...4 */
 	orr	r2, r2, r1, lsl #8	/* r2 = 7654 */
 	str	r3, [r0]
 	str	r2, [r0, #0x04]
 	RET
 	LMEMCPY_8_PAD
 
 /*
  * 0100: dst is 8-bit aligned, src is 32-bit aligned
  */
 	ldr	r3, [r1]		/* BE:r3 = 0123  LE:r3 = 3210 */
 	ldr	r2, [r1, #0x04]		/* BE:r2 = 4567  LE:r2 = 7654 */
 	strb	r3, [r0]
 	mov	r1, r2, lsr #24		/* r1 = ...7 */
 	strb	r1, [r0, #0x07]
 	mov	r1, r3, lsr #8		/* r1 = .321 */
 	mov	r3, r3, lsr #24		/* r3 = ...3 */
 	orr	r3, r3, r2, lsl #8	/* r3 = 6543 */
 	strh	r1, [r0, #0x01]
 	str	r3, [r0, #0x03]
 	RET
 	LMEMCPY_8_PAD
 
 /*
  * 0101: dst is 8-bit aligned, src is 8-bit aligned
  */
 	ldrb	r2, [r1]
 	ldrh	r3, [r1, #0x01]
 	ldr	ip, [r1, #0x03]
 	ldrb	r1, [r1, #0x07]
 	strb	r2, [r0]
 	strh	r3, [r0, #0x01]
 	str	ip, [r0, #0x03]
 	strb	r1, [r0, #0x07]
 	RET
 	LMEMCPY_8_PAD
 
 /*
  * 0110: dst is 8-bit aligned, src is 16-bit aligned
  */
 	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
 	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
 	ldrh	r1, [r1, #0x06]		/* BE:r1 = ..67  LE:r1 = ..76 */
 	strb	r2, [r0]		/* 0 */
 	mov	ip, r1, lsr #8		/* ip = ...7 */
 	strb	ip, [r0, #0x07]		/* 7 */
 	mov	ip, r2, lsr #8		/* ip = ...1 */
 	orr	ip, ip, r3, lsl #8	/* ip = 4321 */
 	mov	r3, r3, lsr #8		/* r3 = .543 */
 	orr	r3, r3, r1, lsl #24	/* r3 = 6543 */
 	strh	ip, [r0, #0x01]
 	str	r3, [r0, #0x03]
 	RET
 	LMEMCPY_8_PAD
 
 /*
  * 0111: dst is 8-bit aligned, src is 8-bit aligned
  */
 	ldrb	r3, [r1]		/* r3 = ...0 */
 	ldr	ip, [r1, #0x01]		/* BE:ip = 1234  LE:ip = 4321 */
 	ldrh	r2, [r1, #0x05]		/* BE:r2 = ..56  LE:r2 = ..65 */
 	ldrb	r1, [r1, #0x07]		/* r1 = ...7 */
 	strb	r3, [r0]
 	mov	r3, ip, lsr #16		/* BE:r3 = ..12  LE:r3 = ..43 */
 	strh	ip, [r0, #0x01]
 	orr	r2, r3, r2, lsl #16	/* r2 = 6543 */
 	str	r2, [r0, #0x03]
 	strb	r1, [r0, #0x07]
 	RET
 	LMEMCPY_8_PAD
 
 /*
  * 1000: dst is 16-bit aligned, src is 32-bit aligned
  */
 	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
 	ldr	r3, [r1, #0x04]		/* BE:r3 = 4567  LE:r3 = 7654 */
 	mov	r1, r2, lsr #16		/* BE:r1 = ..01  LE:r1 = ..32 */
 	strh	r2, [r0]
 	orr	r2, r1, r3, lsl #16	/* r2 = 5432 */
 	mov	r3, r3, lsr #16		/* r3 = ..76 */
 	str	r2, [r0, #0x02]
 	strh	r3, [r0, #0x06]
 	RET
 	LMEMCPY_8_PAD
 
 /*
  * 1001: dst is 16-bit aligned, src is 8-bit aligned
  */
 	ldr	r2, [r1, #-1]		/* BE:r2 = x012  LE:r2 = 210x */
 	ldr	r3, [r1, #0x03]		/* BE:r3 = 3456  LE:r3 = 6543 */
 	ldrb	ip, [r1, #0x07]		/* ip = ...7 */
 	mov	r1, r2, lsr #8		/* BE:r1 = .x01  LE:r1 = .210 */
 	strh	r1, [r0]
 	mov	r1, r2, lsr #24		/* r1 = ...2 */
 	orr	r1, r1, r3, lsl #8	/* r1 = 5432 */
 	mov	r3, r3, lsr #24		/* r3 = ...6 */
 	orr	r3, r3, ip, lsl #8	/* r3 = ..76 */
 	str	r1, [r0, #0x02]
 	strh	r3, [r0, #0x06]
 	RET
 	LMEMCPY_8_PAD
 
 /*
  * 1010: dst is 16-bit aligned, src is 16-bit aligned
  */
 	ldrh	r2, [r1]
 	ldr	ip, [r1, #0x02]
 	ldrh	r3, [r1, #0x06]
 	strh	r2, [r0]
 	str	ip, [r0, #0x02]
 	strh	r3, [r0, #0x06]
 	RET
 	LMEMCPY_8_PAD
 
 /*
  * 1011: dst is 16-bit aligned, src is 8-bit aligned
  */
 	ldr	r3, [r1, #0x05]		/* BE:r3 = 567x  LE:r3 = x765 */
 	ldr	r2, [r1, #0x01]		/* BE:r2 = 1234  LE:r2 = 4321 */
 	ldrb	ip, [r1]		/* ip = ...0 */
 	mov	r1, r3, lsr #8		/* BE:r1 = .567  LE:r1 = .x76 */
 	strh	r1, [r0, #0x06]
 	mov	r3, r3, lsl #24		/* r3 = 5... */
 	orr	r3, r3, r2, lsr #8	/* r3 = 5432 */
 	orr	r2, ip, r2, lsl #8	/* r2 = 3210 */
 	str	r3, [r0, #0x02]
 	strh	r2, [r0]
 	RET
 	LMEMCPY_8_PAD
 
 /*
  * 1100: dst is 8-bit aligned, src is 32-bit aligned
  */
 	ldr	r3, [r1, #0x04]		/* BE:r3 = 4567  LE:r3 = 7654 */
 	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
 	mov	r1, r3, lsr #8		/* BE:r1 = .456  LE:r1 = .765 */
 	strh	r1, [r0, #0x05]
 	strb	r2, [r0]
 	mov	r1, r3, lsr #24		/* r1 = ...7 */
 	strb	r1, [r0, #0x07]
 	mov	r2, r2, lsr #8		/* r2 = .321 */
 	orr	r2, r2, r3, lsl #24	/* r2 = 4321 */
 	str	r2, [r0, #0x01]
 	RET
 	LMEMCPY_8_PAD
 
 /*
  * 1101: dst is 8-bit aligned, src is 8-bit aligned
  */
 	ldrb	r3, [r1]		/* r3 = ...0 */
 	ldrh	r2, [r1, #0x01]		/* BE:r2 = ..12  LE:r2 = ..21 */
 	ldr	ip, [r1, #0x03]		/* BE:ip = 3456  LE:ip = 6543 */
 	ldrb	r1, [r1, #0x07]		/* r1 = ...7 */
 	strb	r3, [r0]
 	mov	r3, ip, lsr #16		/* BE:r3 = ..34  LE:r3 = ..65 */
 	strh	r3, [r0, #0x05]
 	orr	r2, r2, ip, lsl #16	/* r2 = 4321 */
 	str	r2, [r0, #0x01]
 	strb	r1, [r0, #0x07]
 	RET
 	LMEMCPY_8_PAD
 
 /*
  * 1110: dst is 8-bit aligned, src is 16-bit aligned
  */
 	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
 	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
 	ldrh	r1, [r1, #0x06]		/* BE:r1 = ..67  LE:r1 = ..76 */
 	strb	r2, [r0]
 	mov	ip, r2, lsr #8		/* ip = ...1 */
 	orr	ip, ip, r3, lsl #8	/* ip = 4321 */
 	mov	r2, r1, lsr #8		/* r2 = ...7 */
 	strb	r2, [r0, #0x07]
 	mov	r1, r1, lsl #8		/* r1 = .76. */
 	orr	r1, r1, r3, lsr #24	/* r1 = .765 */
 	str	ip, [r0, #0x01]
 	strh	r1, [r0, #0x05]
 	RET
 	LMEMCPY_8_PAD
 
 /*
  * 1111: dst is 8-bit aligned, src is 8-bit aligned
  */
 	ldrb	r2, [r1]
 	ldr	ip, [r1, #0x01]
 	ldrh	r3, [r1, #0x05]
 	ldrb	r1, [r1, #0x07]
 	strb	r2, [r0]
 	str	ip, [r0, #0x01]
 	strh	r3, [r0, #0x05]
 	strb	r1, [r0, #0x07]
 	RET
 	LMEMCPY_8_PAD
 
 /******************************************************************************
  * Special case for 12 byte copies
  */
 #define	LMEMCPY_C_LOG2	7	/* 128 bytes */
 #define	LMEMCPY_C_PAD	.align LMEMCPY_C_LOG2
 	LMEMCPY_C_PAD
 .Lmemcpy_c:
 	and	r2, r1, #0x03
 	orr	r2, r2, r0, lsl #2
 	ands	r2, r2, #0x0f
 	sub	r3, pc, #0x14
 	addne	pc, r3, r2, lsl #LMEMCPY_C_LOG2
 
 /*
  * 0000: dst is 32-bit aligned, src is 32-bit aligned
  */
 	ldr	r2, [r1]
 	ldr	r3, [r1, #0x04]
 	ldr	r1, [r1, #0x08]
 	str	r2, [r0]
 	str	r3, [r0, #0x04]
 	str	r1, [r0, #0x08]
 	RET
 	LMEMCPY_C_PAD
 
 /*
  * 0001: dst is 32-bit aligned, src is 8-bit aligned
  */
 	ldrb	r2, [r1, #0xb]		/* r2 = ...B */
 	ldr	ip, [r1, #0x07]		/* BE:ip = 789A  LE:ip = A987 */
 	ldr	r3, [r1, #0x03]		/* BE:r3 = 3456  LE:r3 = 6543 */
 	ldr	r1, [r1, #-1]		/* BE:r1 = x012  LE:r1 = 210x */
 	mov	r2, r2, lsl #24		/* r2 = B... */
 	orr	r2, r2, ip, lsr #8	/* r2 = BA98 */
 	str	r2, [r0, #0x08]
 	mov	r2, ip, lsl #24		/* r2 = 7... */
 	orr	r2, r2, r3, lsr #8	/* r2 = 7654 */
 	mov	r1, r1, lsr #8		/* r1 = .210 */
 	orr	r1, r1, r3, lsl #24	/* r1 = 3210 */
 	str	r2, [r0, #0x04]
 	str	r1, [r0]
 	RET
 	LMEMCPY_C_PAD
 
 /*
  * 0010: dst is 32-bit aligned, src is 16-bit aligned
  */
 	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
 	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
 	ldr	ip, [r1, #0x06]		/* BE:ip = 6789  LE:ip = 9876 */
 	ldrh	r1, [r1, #0x0a]		/* BE:r1 = ..AB  LE:r1 = ..BA */
 	orr	r2, r2, r3, lsl #16	/* r2 = 3210 */
 	str	r2, [r0]
 	mov	r3, r3, lsr #16		/* r3 = ..54 */
 	orr	r3, r3, ip, lsl #16	/* r3 = 7654 */
 	mov	r1, r1, lsl #16		/* r1 = BA.. */
 	orr	r1, r1, ip, lsr #16	/* r1 = BA98 */
 	str	r3, [r0, #0x04]
 	str	r1, [r0, #0x08]
 	RET
 	LMEMCPY_C_PAD
 
 /*
  * 0011: dst is 32-bit aligned, src is 8-bit aligned
  */
 	ldrb	r2, [r1]		/* r2 = ...0 */
 	ldr	r3, [r1, #0x01]		/* BE:r3 = 1234  LE:r3 = 4321 */
 	ldr	ip, [r1, #0x05]		/* BE:ip = 5678  LE:ip = 8765 */
 	ldr	r1, [r1, #0x09]		/* BE:r1 = 9ABx  LE:r1 = xBA9 */
 	orr	r2, r2, r3, lsl #8	/* r2 = 3210 */
 	str	r2, [r0]
 	mov	r3, r3, lsr #24		/* r3 = ...4 */
 	orr	r3, r3, ip, lsl #8	/* r3 = 7654 */
 	mov	r1, r1, lsl #8		/* r1 = BA9. */
 	orr	r1, r1, ip, lsr #24	/* r1 = BA98 */
 	str	r3, [r0, #0x04]
 	str	r1, [r0, #0x08]
 	RET
 	LMEMCPY_C_PAD
 
 /*
  * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
  */
 	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
 	ldr	r3, [r1, #0x04]		/* BE:r3 = 4567  LE:r3 = 7654 */
 	ldr	ip, [r1, #0x08]		/* BE:ip = 89AB  LE:ip = BA98 */
 	mov	r1, r2, lsr #8		/* BE:r1 = .012  LE:r1 = .321 */
 	strh	r1, [r0, #0x01]
 	strb	r2, [r0]
 	mov	r1, r2, lsr #24		/* r1 = ...3 */
 	orr	r2, r1, r3, lsl #8	/* r1 = 6543 */
 	mov	r1, r3, lsr #24		/* r1 = ...7 */
 	orr	r1, r1, ip, lsl #8	/* r1 = A987 */
 	mov	ip, ip, lsr #24		/* ip = ...B */
 	str	r2, [r0, #0x03]
 	str	r1, [r0, #0x07]
 	strb	ip, [r0, #0x0b]
 	RET
 	LMEMCPY_C_PAD
 
 /*
  * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
  */
 	ldrb	r2, [r1]
 	ldrh	r3, [r1, #0x01]
 	ldr	ip, [r1, #0x03]
 	strb	r2, [r0]
 	ldr	r2, [r1, #0x07]
 	ldrb	r1, [r1, #0x0b]
 	strh	r3, [r0, #0x01]
 	str	ip, [r0, #0x03]
 	str	r2, [r0, #0x07]
 	strb	r1, [r0, #0x0b]
 	RET
 	LMEMCPY_C_PAD
 
 /*
  * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
  */
 	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
 	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
 	ldr	ip, [r1, #0x06]		/* BE:ip = 6789  LE:ip = 9876 */
 	ldrh	r1, [r1, #0x0a]		/* BE:r1 = ..AB  LE:r1 = ..BA */
 	strb	r2, [r0]
 	mov	r2, r2, lsr #8		/* r2 = ...1 */
 	orr	r2, r2, r3, lsl #8	/* r2 = 4321 */
 	strh	r2, [r0, #0x01]
 	mov	r2, r3, lsr #8		/* r2 = .543 */
 	orr	r3, r2, ip, lsl #24	/* r3 = 6543 */
 	mov	r2, ip, lsr #8		/* r2 = .987 */
 	orr	r2, r2, r1, lsl #24	/* r2 = A987 */
 	mov	r1, r1, lsr #8		/* r1 = ...B */
 	str	r3, [r0, #0x03]
 	str	r2, [r0, #0x07]
 	strb	r1, [r0, #0x0b]
 	RET
 	LMEMCPY_C_PAD
 
 /*
  * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
  */
 	ldrb	r2, [r1]
 	ldr	r3, [r1, #0x01]		/* BE:r3 = 1234  LE:r3 = 4321 */
 	ldr	ip, [r1, #0x05]		/* BE:ip = 5678  LE:ip = 8765 */
 	ldr	r1, [r1, #0x09]		/* BE:r1 = 9ABx  LE:r1 = xBA9 */
 	strb	r2, [r0]
 	strh	r3, [r0, #0x01]
 	mov	r3, r3, lsr #16		/* r3 = ..43 */
 	orr	r3, r3, ip, lsl #16	/* r3 = 6543 */
 	mov	ip, ip, lsr #16		/* ip = ..87 */
 	orr	ip, ip, r1, lsl #16	/* ip = A987 */
 	mov	r1, r1, lsr #16		/* r1 = ..xB */
 	str	r3, [r0, #0x03]
 	str	ip, [r0, #0x07]
 	strb	r1, [r0, #0x0b]
 	RET
 	LMEMCPY_C_PAD
 
 /*
  * 1000: dst is 16-bit aligned, src is 32-bit aligned
  */
 	ldr	ip, [r1]		/* BE:ip = 0123  LE:ip = 3210 */
 	ldr	r3, [r1, #0x04]		/* BE:r3 = 4567  LE:r3 = 7654 */
 	ldr	r2, [r1, #0x08]		/* BE:r2 = 89AB  LE:r2 = BA98 */
 	mov	r1, ip, lsr #16		/* BE:r1 = ..01  LE:r1 = ..32 */
 	strh	ip, [r0]
 	orr	r1, r1, r3, lsl #16	/* r1 = 5432 */
 	mov	r3, r3, lsr #16		/* r3 = ..76 */
 	orr	r3, r3, r2, lsl #16	/* r3 = 9876 */
 	mov	r2, r2, lsr #16		/* r2 = ..BA */
 	str	r1, [r0, #0x02]
 	str	r3, [r0, #0x06]
 	strh	r2, [r0, #0x0a]
 	RET
 	LMEMCPY_C_PAD
 
 /*
  * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
  */
 	ldr	r2, [r1, #-1]		/* BE:r2 = x012  LE:r2 = 210x */
 	ldr	r3, [r1, #0x03]		/* BE:r3 = 3456  LE:r3 = 6543 */
 	mov	ip, r2, lsr #8		/* BE:ip = .x01  LE:ip = .210 */
 	strh	ip, [r0]
 	ldr	ip, [r1, #0x07]		/* BE:ip = 789A  LE:ip = A987 */
 	ldrb	r1, [r1, #0x0b]		/* r1 = ...B */
 	mov	r2, r2, lsr #24		/* r2 = ...2 */
 	orr	r2, r2, r3, lsl #8	/* r2 = 5432 */
 	mov	r3, r3, lsr #24		/* r3 = ...6 */
 	orr	r3, r3, ip, lsl #8	/* r3 = 9876 */
 	mov	r1, r1, lsl #8		/* r1 = ..B. */
 	orr	r1, r1, ip, lsr #24	/* r1 = ..BA */
 	str	r2, [r0, #0x02]
 	str	r3, [r0, #0x06]
 	strh	r1, [r0, #0x0a]
 	RET
 	LMEMCPY_C_PAD
 
 /*
  * 1010: dst is 16-bit aligned, src is 16-bit aligned
  */
 	ldrh	r2, [r1]
 	ldr	r3, [r1, #0x02]
 	ldr	ip, [r1, #0x06]
 	ldrh	r1, [r1, #0x0a]
 	strh	r2, [r0]
 	str	r3, [r0, #0x02]
 	str	ip, [r0, #0x06]
 	strh	r1, [r0, #0x0a]
 	RET
 	LMEMCPY_C_PAD
 
 /*
  * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
  */
 	ldr	r2, [r1, #0x09]		/* BE:r2 = 9ABx  LE:r2 = xBA9 */
 	ldr	r3, [r1, #0x05]		/* BE:r3 = 5678  LE:r3 = 8765 */
 	mov	ip, r2, lsr #8		/* BE:ip = .9AB  LE:ip = .xBA */
 	strh	ip, [r0, #0x0a]
 	ldr	ip, [r1, #0x01]		/* BE:ip = 1234  LE:ip = 4321 */
 	ldrb	r1, [r1]		/* r1 = ...0 */
 	mov	r2, r2, lsl #24		/* r2 = 9... */
 	orr	r2, r2, r3, lsr #8	/* r2 = 9876 */
 	mov	r3, r3, lsl #24		/* r3 = 5... */
 	orr	r3, r3, ip, lsr #8	/* r3 = 5432 */
 	orr	r1, r1, ip, lsl #8	/* r1 = 3210 */
 	str	r2, [r0, #0x06]
 	str	r3, [r0, #0x02]
 	strh	r1, [r0]
 	RET
 	LMEMCPY_C_PAD
 
 /*
  * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
  */
 	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
 	ldr	ip, [r1, #0x04]		/* BE:ip = 4567  LE:ip = 7654 */
 	ldr	r1, [r1, #0x08]		/* BE:r1 = 89AB  LE:r1 = BA98 */
 	strb	r2, [r0]
 	mov	r3, r2, lsr #8		/* r3 = .321 */
 	orr	r3, r3, ip, lsl #24	/* r3 = 4321 */
 	str	r3, [r0, #0x01]
 	mov	r3, ip, lsr #8		/* r3 = .765 */
 	orr	r3, r3, r1, lsl #24	/* r3 = 8765 */
 	str	r3, [r0, #0x05]
 	mov	r1, r1, lsr #8		/* r1 = .BA9 */
 	strh	r1, [r0, #0x09]
 	mov	r1, r1, lsr #16		/* r1 = ...B */
 	strb	r1, [r0, #0x0b]
 	RET
 	LMEMCPY_C_PAD
 
 /*
  * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
  */
 	ldrb	r2, [r1, #0x0b]		/* r2 = ...B */
 	ldr	r3, [r1, #0x07]		/* BE:r3 = 789A  LE:r3 = A987 */
 	ldr	ip, [r1, #0x03]		/* BE:ip = 3456  LE:ip = 6543 */
 	ldr	r1, [r1, #-1]		/* BE:r1 = x012  LE:r1 = 210x */
 	strb	r2, [r0, #0x0b]
 	mov	r2, r3, lsr #16		/* r2 = ..A9 */
 	strh	r2, [r0, #0x09]
 	mov	r3, r3, lsl #16		/* r3 = 87.. */
 	orr	r3, r3, ip, lsr #16	/* r3 = 8765 */
 	mov	ip, ip, lsl #16		/* ip = 43.. */
 	orr	ip, ip, r1, lsr #16	/* ip = 4321 */
 	mov	r1, r1, lsr #8		/* r1 = .210 */
 	str	r3, [r0, #0x05]
 	str	ip, [r0, #0x01]
 	strb	r1, [r0]
 	RET
 	LMEMCPY_C_PAD
 
 /*
  * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
  */
 	ldrh	r2, [r1]		/* r2 = ..10 */
 	ldr	r3, [r1, #0x02]		/* r3 = 5432 */
 	ldr	ip, [r1, #0x06]		/* ip = 9876 */
 	ldrh	r1, [r1, #0x0a]		/* r1 = ..BA */
 	strb	r2, [r0]
 	mov	r2, r2, lsr #8		/* r2 = ...1 */
 	orr	r2, r2, r3, lsl #8	/* r2 = 4321 */
 	mov	r3, r3, lsr #24		/* r3 = ...5 */
 	orr	r3, r3, ip, lsl #8	/* r3 = 8765 */
 	mov	ip, ip, lsr #24		/* ip = ...9 */
 	orr	ip, ip, r1, lsl #8	/* ip = .BA9 */
 	mov	r1, r1, lsr #8		/* r1 = ...B */
 	str	r2, [r0, #0x01]
 	str	r3, [r0, #0x05]
 	strh	ip, [r0, #0x09]
 	strb	r1, [r0, #0x0b]
 	RET
 	LMEMCPY_C_PAD
 
 /*
  * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
  */
 	ldrb	r2, [r1]
 	ldr	r3, [r1, #0x01]
 	ldr	ip, [r1, #0x05]
 	strb	r2, [r0]
 	ldrh	r2, [r1, #0x09]
 	ldrb	r1, [r1, #0x0b]
 	str	r3, [r0, #0x01]
 	str	ip, [r0, #0x05]
 	strh	r2, [r0, #0x09]
 	strb	r1, [r0, #0x0b]
 	RET
 END(memcpy)
diff --git a/sys/arm/include/md_var.h b/sys/arm/include/md_var.h
index d60992f104e1..19468bd30e02 100644
--- a/sys/arm/include/md_var.h
+++ b/sys/arm/include/md_var.h
@@ -1,71 +1,61 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1995 Bruce D. Evans.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the author nor the names of contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: FreeBSD: src/sys/i386/include/md_var.h,v 1.40 2001/07/12
  * $FreeBSD$
  */
 
 #ifndef	_MACHINE_MD_VAR_H_
 #define	_MACHINE_MD_VAR_H_
 
 extern long Maxmem;
 extern char sigcode[];
 extern int szsigcode;
 extern u_long elf_hwcap;
 extern u_long elf_hwcap2;
 extern vm_paddr_t arm_physmem_kernaddr;
 
-extern int (*_arm_memcpy)(void *, void *, int, int);
-extern int (*_arm_bzero)(void *, int, int);
-
-extern int _min_memcpy_size;
-extern int _min_bzero_size;
-
-#define DST_IS_USER	0x1
-#define SRC_IS_USER	0x2
-#define IS_PHYSICAL	0x4
-
 enum cpu_class {
 	CPU_CLASS_NONE,
 	CPU_CLASS_CORTEXA,
 	CPU_CLASS_KRAIT,
 	CPU_CLASS_ARM11J,
 	CPU_CLASS_MARVELL
 };
 extern enum cpu_class cpu_class;
 
 struct dumperinfo;
 extern int busdma_swi_pending;
 void busdma_swi(void);
 int minidumpsys(struct dumperinfo *);
 
 extern uint32_t initial_fpscr;
 
 #endif /* !_MACHINE_MD_VAR_H_ */