Index: head/sys/arm/arm/bcopyinout_xscale.S =================================================================== --- head/sys/arm/arm/bcopyinout_xscale.S (revision 368152) +++ head/sys/arm/arm/bcopyinout_xscale.S (revision 368153) @@ -1,951 +1,819 @@ /* $NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $ */ /*- * Copyright 2003 Wasabi Systems, Inc. * All rights reserved. * * Written by Steve C. Woodford for Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project by * Wasabi Systems, Inc. * 4. The name of Wasabi Systems, Inc. may not be used to endorse * or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); .syntax unified .text .align 2 #define GET_PCB(tmp) \ mrc p15, 0, tmp, c13, c0, 4; \ add tmp, tmp, #(TD_PCB) /* * r0 = user space address * r1 = kernel space address * r2 = length * * Copies bytes from user space to kernel space */ ENTRY(copyin) cmp r2, #0x00 movle r0, #0x00 movle pc, lr /* Bail early if length is <= 0 */ adds r3, r0, r2 movcs r0, #EFAULT RETc(cs) ldr r12, =(VM_MAXUSER_ADDRESS + 1) cmp r3, r12 movcs r0, #EFAULT RETc(cs) ldr r3, .L_arm_memcpy ldr r3, [r3] cmp r3, #0 beq .Lnormal ldr r3, .L_min_memcpy_size ldr r3, [r3] cmp r2, r3 blt .Lnormal stmfd sp!, {r0-r2, r4, lr} mov r3, r0 mov r0, r1 mov r1, r3 mov r3, #2 /* SRC_IS_USER */ ldr r4, .L_arm_memcpy mov lr, pc ldr pc, [r4] cmp r0, #0 ldmfd sp!, {r0-r2, r4, lr} moveq r0, #0 RETeq .Lnormal: stmfd sp!, {r10-r11, lr} GET_PCB(r10) ldr r10, [r10] mov r3, #0x00 adr ip, .Lcopyin_fault ldr r11, [r10, #PCB_ONFAULT] str ip, [r10, #PCB_ONFAULT] bl .Lcopyin_guts str r11, [r10, #PCB_ONFAULT] mov r0, #0x00 ldmfd sp!, {r10-r11, pc} .Lcopyin_fault: ldr r0, =EFAULT str r11, [r10, #PCB_ONFAULT] cmp r3, #0x00 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */ ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */ ldmfd sp!, {r10-r11, pc} .Lcopyin_guts: pld [r0] /* Word-align the destination buffer */ ands ip, r1, #0x03 /* Already word aligned? */ beq .Lcopyin_wordaligned /* Yup */ rsb ip, ip, #0x04 cmp r2, ip /* Enough bytes left to align it? */ blt .Lcopyin_l4_2 /* Nope. Just copy bytewise */ sub r2, r2, ip rsbs ip, ip, #0x03 addne pc, pc, ip, lsl #3 nop ldrbt ip, [r0], #0x01 strb ip, [r1], #0x01 ldrbt ip, [r0], #0x01 strb ip, [r1], #0x01 ldrbt ip, [r0], #0x01 strb ip, [r1], #0x01 cmp r2, #0x00 /* All done? */ RETeq /* Destination buffer is now word aligned */ .Lcopyin_wordaligned: ands ip, r0, #0x03 /* Is src also word-aligned? */ bne .Lcopyin_bad_align /* Nope. Things just got bad */ cmp r2, #0x08 /* Less than 8 bytes remaining? */ blt .Lcopyin_w_less_than8 /* Quad-align the destination buffer */ tst r1, #0x07 /* Already quad aligned? */ ldrtne ip, [r0], #0x04 strne ip, [r1], #0x04 subne r2, r2, #0x04 stmfd sp!, {r4-r9} /* Free up some registers */ mov r3, #-1 /* Signal restore r4-r9 */ /* Destination buffer quad aligned, source is word aligned */ subs r2, r2, #0x80 blt .Lcopyin_w_lessthan128 /* Copy 128 bytes at a time */ .Lcopyin_w_loop128: ldrt r4, [r0], #0x04 /* LD:00-03 */ ldrt r5, [r0], #0x04 /* LD:04-07 */ pld [r0, #0x18] /* Prefetch 0x20 */ ldrt r6, [r0], #0x04 /* LD:08-0b */ ldrt r7, [r0], #0x04 /* LD:0c-0f */ ldrt r8, [r0], #0x04 /* LD:10-13 */ ldrt r9, [r0], #0x04 /* LD:14-17 */ strd r4, [r1], #0x08 /* ST:00-07 */ ldrt r4, [r0], #0x04 /* LD:18-1b */ ldrt r5, [r0], #0x04 /* LD:1c-1f */ strd r6, [r1], #0x08 /* ST:08-0f */ ldrt r6, [r0], #0x04 /* LD:20-23 */ ldrt r7, [r0], #0x04 /* LD:24-27 */ pld [r0, #0x18] /* Prefetch 0x40 */ strd r8, [r1], #0x08 /* ST:10-17 */ ldrt r8, [r0], #0x04 /* LD:28-2b */ ldrt r9, [r0], #0x04 /* LD:2c-2f */ strd r4, [r1], #0x08 /* ST:18-1f */ ldrt r4, [r0], #0x04 /* LD:30-33 */ ldrt r5, [r0], #0x04 /* LD:34-37 */ strd r6, [r1], #0x08 /* ST:20-27 */ ldrt r6, [r0], #0x04 /* LD:38-3b */ ldrt r7, [r0], #0x04 /* LD:3c-3f */ strd r8, [r1], #0x08 /* ST:28-2f */ ldrt r8, [r0], #0x04 /* LD:40-43 */ ldrt r9, [r0], #0x04 /* LD:44-47 */ pld [r0, #0x18] /* Prefetch 0x60 */ strd r4, [r1], #0x08 /* ST:30-37 */ ldrt r4, [r0], #0x04 /* LD:48-4b */ ldrt r5, [r0], #0x04 /* LD:4c-4f */ strd r6, [r1], #0x08 /* ST:38-3f */ ldrt r6, [r0], #0x04 /* LD:50-53 */ ldrt r7, [r0], #0x04 /* LD:54-57 */ strd r8, [r1], #0x08 /* ST:40-47 */ ldrt r8, [r0], #0x04 /* LD:58-5b */ ldrt r9, [r0], #0x04 /* LD:5c-5f */ strd r4, [r1], #0x08 /* ST:48-4f */ ldrt r4, [r0], #0x04 /* LD:60-63 */ ldrt r5, [r0], #0x04 /* LD:64-67 */ pld [r0, #0x18] /* Prefetch 0x80 */ strd r6, [r1], #0x08 /* ST:50-57 */ ldrt r6, [r0], #0x04 /* LD:68-6b */ ldrt r7, [r0], #0x04 /* LD:6c-6f */ strd r8, [r1], #0x08 /* ST:58-5f */ ldrt r8, [r0], #0x04 /* LD:70-73 */ ldrt r9, [r0], #0x04 /* LD:74-77 */ strd r4, [r1], #0x08 /* ST:60-67 */ ldrt r4, [r0], #0x04 /* LD:78-7b */ ldrt r5, [r0], #0x04 /* LD:7c-7f */ strd r6, [r1], #0x08 /* ST:68-6f */ strd r8, [r1], #0x08 /* ST:70-77 */ subs r2, r2, #0x80 strd r4, [r1], #0x08 /* ST:78-7f */ bge .Lcopyin_w_loop128 .Lcopyin_w_lessthan128: adds r2, r2, #0x80 /* Adjust for extra sub */ ldmfdeq sp!, {r4-r9} RETeq subs r2, r2, #0x20 blt .Lcopyin_w_lessthan32 /* Copy 32 bytes at a time */ .Lcopyin_w_loop32: ldrt r4, [r0], #0x04 ldrt r5, [r0], #0x04 pld [r0, #0x18] ldrt r6, [r0], #0x04 ldrt r7, [r0], #0x04 ldrt r8, [r0], #0x04 ldrt r9, [r0], #0x04 strd r4, [r1], #0x08 ldrt r4, [r0], #0x04 ldrt r5, [r0], #0x04 strd r6, [r1], #0x08 strd r8, [r1], #0x08 subs r2, r2, #0x20 strd r4, [r1], #0x08 bge .Lcopyin_w_loop32 .Lcopyin_w_lessthan32: adds r2, r2, #0x20 /* Adjust for extra sub */ ldmfdeq sp!, {r4-r9} RETeq /* Return now if done */ and r4, r2, #0x18 rsb r5, r4, #0x18 subs r2, r2, r4 add pc, pc, r5, lsl #1 nop /* At least 24 bytes remaining */ ldrt r4, [r0], #0x04 ldrt r5, [r0], #0x04 nop strd r4, [r1], #0x08 /* At least 16 bytes remaining */ ldrt r4, [r0], #0x04 ldrt r5, [r0], #0x04 nop strd r4, [r1], #0x08 /* At least 8 bytes remaining */ ldrt r4, [r0], #0x04 ldrt r5, [r0], #0x04 nop strd r4, [r1], #0x08 /* Less than 8 bytes remaining */ ldmfd sp!, {r4-r9} RETeq /* Return now if done */ mov r3, #0x00 .Lcopyin_w_less_than8: subs r2, r2, #0x04 ldrtge ip, [r0], #0x04 strge ip, [r1], #0x04 RETeq /* Return now if done */ addlt r2, r2, #0x04 ldrbt ip, [r0], #0x01 cmp r2, #0x02 ldrbtge r2, [r0], #0x01 strb ip, [r1], #0x01 ldrbtgt ip, [r0] strbge r2, [r1], #0x01 strbgt ip, [r1] RET /* * At this point, it has not been possible to word align both buffers. * The destination buffer (r1) is word aligned, but the source buffer * (r0) is not. */ .Lcopyin_bad_align: stmfd sp!, {r4-r7} mov r3, #0x01 bic r0, r0, #0x03 cmp ip, #2 ldrt ip, [r0], #0x04 bgt .Lcopyin_bad3 beq .Lcopyin_bad2 b .Lcopyin_bad1 .Lcopyin_bad1_loop16: -#ifdef __ARMEB__ - mov r4, ip, lsl #8 -#else mov r4, ip, lsr #8 -#endif ldrt r5, [r0], #0x04 pld [r0, #0x018] ldrt r6, [r0], #0x04 ldrt r7, [r0], #0x04 ldrt ip, [r0], #0x04 -#ifdef __ARMEB__ - orr r4, r4, r5, lsr #24 - mov r5, r5, lsl #8 - orr r5, r5, r6, lsr #24 - mov r6, r6, lsl #8 - orr r6, r6, r7, lsr #24 - mov r7, r7, lsl #8 - orr r7, r7, ip, lsr #24 -#else orr r4, r4, r5, lsl #24 mov r5, r5, lsr #8 orr r5, r5, r6, lsl #24 mov r6, r6, lsr #8 orr r6, r6, r7, lsl #24 mov r7, r7, lsr #8 orr r7, r7, ip, lsl #24 -#endif str r4, [r1], #0x04 str r5, [r1], #0x04 str r6, [r1], #0x04 str r7, [r1], #0x04 .Lcopyin_bad1: subs r2, r2, #0x10 bge .Lcopyin_bad1_loop16 adds r2, r2, #0x10 ldmfdeq sp!, {r4-r7} RETeq /* Return now if done */ subs r2, r2, #0x04 sublt r0, r0, #0x03 blt .Lcopyin_l4 .Lcopyin_bad1_loop4: -#ifdef __ARMEB__ - mov r4, ip, lsl #8 -#else mov r4, ip, lsr #8 -#endif ldrt ip, [r0], #0x04 subs r2, r2, #0x04 -#ifdef __ARMEB__ - orr r4, r4, ip, lsr #24 -#else orr r4, r4, ip, lsl #24 -#endif str r4, [r1], #0x04 bge .Lcopyin_bad1_loop4 sub r0, r0, #0x03 b .Lcopyin_l4 .Lcopyin_bad2_loop16: -#ifdef __ARMEB__ - mov r4, ip, lsl #16 -#else mov r4, ip, lsr #16 -#endif ldrt r5, [r0], #0x04 pld [r0, #0x018] ldrt r6, [r0], #0x04 ldrt r7, [r0], #0x04 ldrt ip, [r0], #0x04 -#ifdef __ARMEB__ - orr r4, r4, r5, lsr #16 - mov r5, r5, lsl #16 - orr r5, r5, r6, lsr #16 - mov r6, r6, lsl #16 - orr r6, r6, r7, lsr #16 - mov r7, r7, lsl #16 - orr r7, r7, ip, lsr #16 -#else orr r4, r4, r5, lsl #16 mov r5, r5, lsr #16 orr r5, r5, r6, lsl #16 mov r6, r6, lsr #16 orr r6, r6, r7, lsl #16 mov r7, r7, lsr #16 orr r7, r7, ip, lsl #16 -#endif str r4, [r1], #0x04 str r5, [r1], #0x04 str r6, [r1], #0x04 str r7, [r1], #0x04 .Lcopyin_bad2: subs r2, r2, #0x10 bge .Lcopyin_bad2_loop16 adds r2, r2, #0x10 ldmfdeq sp!, {r4-r7} RETeq /* Return now if done */ subs r2, r2, #0x04 sublt r0, r0, #0x02 blt .Lcopyin_l4 .Lcopyin_bad2_loop4: -#ifdef __ARMEB__ - mov r4, ip, lsl #16 -#else mov r4, ip, lsr #16 -#endif ldrt ip, [r0], #0x04 subs r2, r2, #0x04 -#ifdef __ARMEB__ - orr r4, r4, ip, lsr #16 -#else orr r4, r4, ip, lsl #16 -#endif str r4, [r1], #0x04 bge .Lcopyin_bad2_loop4 sub r0, r0, #0x02 b .Lcopyin_l4 .Lcopyin_bad3_loop16: -#ifdef __ARMEB__ - mov r4, ip, lsl #24 -#else mov r4, ip, lsr #24 -#endif ldrt r5, [r0], #0x04 pld [r0, #0x018] ldrt r6, [r0], #0x04 ldrt r7, [r0], #0x04 ldrt ip, [r0], #0x04 -#ifdef __ARMEB__ - orr r4, r4, r5, lsr #8 - mov r5, r5, lsl #24 - orr r5, r5, r6, lsr #8 - mov r6, r6, lsl #24 - orr r6, r6, r7, lsr #8 - mov r7, r7, lsl #24 - orr r7, r7, ip, lsr #8 -#else orr r4, r4, r5, lsl #8 mov r5, r5, lsr #24 orr r5, r5, r6, lsl #8 mov r6, r6, lsr #24 orr r6, r6, r7, lsl #8 mov r7, r7, lsr #24 orr r7, r7, ip, lsl #8 -#endif str r4, [r1], #0x04 str r5, [r1], #0x04 str r6, [r1], #0x04 str r7, [r1], #0x04 .Lcopyin_bad3: subs r2, r2, #0x10 bge .Lcopyin_bad3_loop16 adds r2, r2, #0x10 ldmfdeq sp!, {r4-r7} RETeq /* Return now if done */ subs r2, r2, #0x04 sublt r0, r0, #0x01 blt .Lcopyin_l4 .Lcopyin_bad3_loop4: -#ifdef __ARMEB__ - mov r4, ip, lsl #24 -#else mov r4, ip, lsr #24 -#endif ldrt ip, [r0], #0x04 subs r2, r2, #0x04 -#ifdef __ARMEB__ - orr r4, r4, ip, lsr #8 -#else orr r4, r4, ip, lsl #8 -#endif str r4, [r1], #0x04 bge .Lcopyin_bad3_loop4 sub r0, r0, #0x01 .Lcopyin_l4: ldmfd sp!, {r4-r7} mov r3, #0x00 adds r2, r2, #0x04 RETeq .Lcopyin_l4_2: rsbs r2, r2, #0x03 addne pc, pc, r2, lsl #3 nop ldrbt ip, [r0], #0x01 strb ip, [r1], #0x01 ldrbt ip, [r0], #0x01 strb ip, [r1], #0x01 ldrbt ip, [r0] strb ip, [r1] RET END(copyin) /* * r0 = kernel space address * r1 = user space address * r2 = length * * Copies bytes from kernel space to user space */ ENTRY(copyout) cmp r2, #0x00 movle r0, #0x00 movle pc, lr /* Bail early if length is <= 0 */ adds r3, r1, r2 movcs r0, #EFAULT RETc(cs) ldr r12, =(VM_MAXUSER_ADDRESS + 1) cmp r3, r12 movcs r0, #EFAULT RETc(cs) ldr r3, .L_arm_memcpy ldr r3, [r3] cmp r3, #0 beq .Lnormale ldr r3, .L_min_memcpy_size ldr r3, [r3] cmp r2, r3 blt .Lnormale stmfd sp!, {r0-r2, r4, lr} mov r3, r0 mov r0, r1 mov r1, r3 mov r3, #1 /* DST_IS_USER */ ldr r4, .L_arm_memcpy mov lr, pc ldr pc, [r4] cmp r0, #0 ldmfd sp!, {r0-r2, r4, lr} moveq r0, #0 RETeq .Lnormale: stmfd sp!, {r10-r11, lr} GET_PCB(r10) ldr r10, [r10] mov r3, #0x00 adr ip, .Lcopyout_fault ldr r11, [r10, #PCB_ONFAULT] str ip, [r10, #PCB_ONFAULT] bl .Lcopyout_guts str r11, [r10, #PCB_ONFAULT] mov r0, #0x00 ldmfd sp!, {r10-r11, pc} .Lcopyout_fault: ldr r0, =EFAULT str r11, [r10, #PCB_ONFAULT] cmp r3, #0x00 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */ ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */ ldmfd sp!, {r10-r11, pc} .Lcopyout_guts: pld [r0] /* Word-align the destination buffer */ ands ip, r1, #0x03 /* Already word aligned? */ beq .Lcopyout_wordaligned /* Yup */ rsb ip, ip, #0x04 cmp r2, ip /* Enough bytes left to align it? */ blt .Lcopyout_l4_2 /* Nope. Just copy bytewise */ sub r2, r2, ip rsbs ip, ip, #0x03 addne pc, pc, ip, lsl #3 nop ldrb ip, [r0], #0x01 strbt ip, [r1], #0x01 ldrb ip, [r0], #0x01 strbt ip, [r1], #0x01 ldrb ip, [r0], #0x01 strbt ip, [r1], #0x01 cmp r2, #0x00 /* All done? */ RETeq /* Destination buffer is now word aligned */ .Lcopyout_wordaligned: ands ip, r0, #0x03 /* Is src also word-aligned? */ bne .Lcopyout_bad_align /* Nope. Things just got bad */ cmp r2, #0x08 /* Less than 8 bytes remaining? */ blt .Lcopyout_w_less_than8 /* Quad-align the destination buffer */ tst r0, #0x07 /* Already quad aligned? */ ldrne ip, [r0], #0x04 subne r2, r2, #0x04 strtne ip, [r1], #0x04 stmfd sp!, {r4-r9} /* Free up some registers */ mov r3, #-1 /* Signal restore r4-r9 */ /* Destination buffer word aligned, source is quad aligned */ subs r2, r2, #0x80 blt .Lcopyout_w_lessthan128 /* Copy 128 bytes at a time */ .Lcopyout_w_loop128: ldrd r4, [r0], #0x08 /* LD:00-07 */ pld [r0, #0x18] /* Prefetch 0x20 */ ldrd r6, [r0], #0x08 /* LD:08-0f */ ldrd r8, [r0], #0x08 /* LD:10-17 */ strt r4, [r1], #0x04 /* ST:00-03 */ strt r5, [r1], #0x04 /* ST:04-07 */ ldrd r4, [r0], #0x08 /* LD:18-1f */ strt r6, [r1], #0x04 /* ST:08-0b */ strt r7, [r1], #0x04 /* ST:0c-0f */ ldrd r6, [r0], #0x08 /* LD:20-27 */ pld [r0, #0x18] /* Prefetch 0x40 */ strt r8, [r1], #0x04 /* ST:10-13 */ strt r9, [r1], #0x04 /* ST:14-17 */ ldrd r8, [r0], #0x08 /* LD:28-2f */ strt r4, [r1], #0x04 /* ST:18-1b */ strt r5, [r1], #0x04 /* ST:1c-1f */ ldrd r4, [r0], #0x08 /* LD:30-37 */ strt r6, [r1], #0x04 /* ST:20-23 */ strt r7, [r1], #0x04 /* ST:24-27 */ ldrd r6, [r0], #0x08 /* LD:38-3f */ strt r8, [r1], #0x04 /* ST:28-2b */ strt r9, [r1], #0x04 /* ST:2c-2f */ ldrd r8, [r0], #0x08 /* LD:40-47 */ pld [r0, #0x18] /* Prefetch 0x60 */ strt r4, [r1], #0x04 /* ST:30-33 */ strt r5, [r1], #0x04 /* ST:34-37 */ ldrd r4, [r0], #0x08 /* LD:48-4f */ strt r6, [r1], #0x04 /* ST:38-3b */ strt r7, [r1], #0x04 /* ST:3c-3f */ ldrd r6, [r0], #0x08 /* LD:50-57 */ strt r8, [r1], #0x04 /* ST:40-43 */ strt r9, [r1], #0x04 /* ST:44-47 */ ldrd r8, [r0], #0x08 /* LD:58-4f */ strt r4, [r1], #0x04 /* ST:48-4b */ strt r5, [r1], #0x04 /* ST:4c-4f */ ldrd r4, [r0], #0x08 /* LD:60-67 */ pld [r0, #0x18] /* Prefetch 0x80 */ strt r6, [r1], #0x04 /* ST:50-53 */ strt r7, [r1], #0x04 /* ST:54-57 */ ldrd r6, [r0], #0x08 /* LD:68-6f */ strt r8, [r1], #0x04 /* ST:58-5b */ strt r9, [r1], #0x04 /* ST:5c-5f */ ldrd r8, [r0], #0x08 /* LD:70-77 */ strt r4, [r1], #0x04 /* ST:60-63 */ strt r5, [r1], #0x04 /* ST:64-67 */ ldrd r4, [r0], #0x08 /* LD:78-7f */ strt r6, [r1], #0x04 /* ST:68-6b */ strt r7, [r1], #0x04 /* ST:6c-6f */ strt r8, [r1], #0x04 /* ST:70-73 */ strt r9, [r1], #0x04 /* ST:74-77 */ subs r2, r2, #0x80 strt r4, [r1], #0x04 /* ST:78-7b */ strt r5, [r1], #0x04 /* ST:7c-7f */ bge .Lcopyout_w_loop128 .Lcopyout_w_lessthan128: adds r2, r2, #0x80 /* Adjust for extra sub */ ldmfdeq sp!, {r4-r9} RETeq /* Return now if done */ subs r2, r2, #0x20 blt .Lcopyout_w_lessthan32 /* Copy 32 bytes at a time */ .Lcopyout_w_loop32: ldrd r4, [r0], #0x08 pld [r0, #0x18] ldrd r6, [r0], #0x08 ldrd r8, [r0], #0x08 strt r4, [r1], #0x04 strt r5, [r1], #0x04 ldrd r4, [r0], #0x08 strt r6, [r1], #0x04 strt r7, [r1], #0x04 strt r8, [r1], #0x04 strt r9, [r1], #0x04 subs r2, r2, #0x20 strt r4, [r1], #0x04 strt r5, [r1], #0x04 bge .Lcopyout_w_loop32 .Lcopyout_w_lessthan32: adds r2, r2, #0x20 /* Adjust for extra sub */ ldmfdeq sp!, {r4-r9} RETeq /* Return now if done */ and r4, r2, #0x18 rsb r5, r4, #0x18 subs r2, r2, r4 add pc, pc, r5, lsl #1 nop /* At least 24 bytes remaining */ ldrd r4, [r0], #0x08 strt r4, [r1], #0x04 strt r5, [r1], #0x04 nop /* At least 16 bytes remaining */ ldrd r4, [r0], #0x08 strt r4, [r1], #0x04 strt r5, [r1], #0x04 nop /* At least 8 bytes remaining */ ldrd r4, [r0], #0x08 strt r4, [r1], #0x04 strt r5, [r1], #0x04 nop /* Less than 8 bytes remaining */ ldmfd sp!, {r4-r9} RETeq /* Return now if done */ mov r3, #0x00 .Lcopyout_w_less_than8: subs r2, r2, #0x04 ldrge ip, [r0], #0x04 strtge ip, [r1], #0x04 RETeq /* Return now if done */ addlt r2, r2, #0x04 ldrb ip, [r0], #0x01 cmp r2, #0x02 ldrbge r2, [r0], #0x01 strbt ip, [r1], #0x01 ldrbgt ip, [r0] strbtge r2, [r1], #0x01 strbtgt ip, [r1] RET /* * At this point, it has not been possible to word align both buffers. * The destination buffer (r1) is word aligned, but the source buffer * (r0) is not. */ .Lcopyout_bad_align: stmfd sp!, {r4-r7} mov r3, #0x01 bic r0, r0, #0x03 cmp ip, #2 ldr ip, [r0], #0x04 bgt .Lcopyout_bad3 beq .Lcopyout_bad2 b .Lcopyout_bad1 .Lcopyout_bad1_loop16: -#ifdef __ARMEB__ - mov r4, ip, lsl #8 -#else mov r4, ip, lsr #8 -#endif ldr r5, [r0], #0x04 pld [r0, #0x018] ldr r6, [r0], #0x04 ldr r7, [r0], #0x04 ldr ip, [r0], #0x04 -#ifdef __ARMEB__ - orr r4, r4, r5, lsr #24 - mov r5, r5, lsl #8 - orr r5, r5, r6, lsr #24 - mov r6, r6, lsl #8 - orr r6, r6, r7, lsr #24 - mov r7, r7, lsl #8 - orr r7, r7, ip, lsr #24 -#else orr r4, r4, r5, lsl #24 mov r5, r5, lsr #8 orr r5, r5, r6, lsl #24 mov r6, r6, lsr #8 orr r6, r6, r7, lsl #24 mov r7, r7, lsr #8 orr r7, r7, ip, lsl #24 -#endif strt r4, [r1], #0x04 strt r5, [r1], #0x04 strt r6, [r1], #0x04 strt r7, [r1], #0x04 .Lcopyout_bad1: subs r2, r2, #0x10 bge .Lcopyout_bad1_loop16 adds r2, r2, #0x10 ldmfdeq sp!, {r4-r7} RETeq /* Return now if done */ subs r2, r2, #0x04 sublt r0, r0, #0x03 blt .Lcopyout_l4 .Lcopyout_bad1_loop4: -#ifdef __ARMEB__ - mov r4, ip, lsl #8 -#else mov r4, ip, lsr #8 -#endif ldr ip, [r0], #0x04 subs r2, r2, #0x04 -#ifdef __ARMEB__ - orr r4, r4, ip, lsr #24 -#else orr r4, r4, ip, lsl #24 -#endif strt r4, [r1], #0x04 bge .Lcopyout_bad1_loop4 sub r0, r0, #0x03 b .Lcopyout_l4 .Lcopyout_bad2_loop16: -#ifdef __ARMEB__ - mov r4, ip, lsl #16 -#else mov r4, ip, lsr #16 -#endif ldr r5, [r0], #0x04 pld [r0, #0x018] ldr r6, [r0], #0x04 ldr r7, [r0], #0x04 ldr ip, [r0], #0x04 -#ifdef __ARMEB__ - orr r4, r4, r5, lsr #16 - mov r5, r5, lsl #16 - orr r5, r5, r6, lsr #16 - mov r6, r6, lsl #16 - orr r6, r6, r7, lsr #16 - mov r7, r7, lsl #16 - orr r7, r7, ip, lsr #16 -#else orr r4, r4, r5, lsl #16 mov r5, r5, lsr #16 orr r5, r5, r6, lsl #16 mov r6, r6, lsr #16 orr r6, r6, r7, lsl #16 mov r7, r7, lsr #16 orr r7, r7, ip, lsl #16 -#endif strt r4, [r1], #0x04 strt r5, [r1], #0x04 strt r6, [r1], #0x04 strt r7, [r1], #0x04 .Lcopyout_bad2: subs r2, r2, #0x10 bge .Lcopyout_bad2_loop16 adds r2, r2, #0x10 ldmfdeq sp!, {r4-r7} RETeq /* Return now if done */ subs r2, r2, #0x04 sublt r0, r0, #0x02 blt .Lcopyout_l4 .Lcopyout_bad2_loop4: -#ifdef __ARMEB__ - mov r4, ip, lsl #16 -#else mov r4, ip, lsr #16 -#endif ldr ip, [r0], #0x04 subs r2, r2, #0x04 -#ifdef __ARMEB__ - orr r4, r4, ip, lsr #16 -#else orr r4, r4, ip, lsl #16 -#endif strt r4, [r1], #0x04 bge .Lcopyout_bad2_loop4 sub r0, r0, #0x02 b .Lcopyout_l4 .Lcopyout_bad3_loop16: -#ifdef __ARMEB__ - mov r4, ip, lsl #24 -#else mov r4, ip, lsr #24 -#endif ldr r5, [r0], #0x04 pld [r0, #0x018] ldr r6, [r0], #0x04 ldr r7, [r0], #0x04 ldr ip, [r0], #0x04 -#ifdef __ARMEB__ - orr r4, r4, r5, lsr #8 - mov r5, r5, lsl #24 - orr r5, r5, r6, lsr #8 - mov r6, r6, lsl #24 - orr r6, r6, r7, lsr #8 - mov r7, r7, lsl #24 - orr r7, r7, ip, lsr #8 -#else orr r4, r4, r5, lsl #8 mov r5, r5, lsr #24 orr r5, r5, r6, lsl #8 mov r6, r6, lsr #24 orr r6, r6, r7, lsl #8 mov r7, r7, lsr #24 orr r7, r7, ip, lsl #8 -#endif strt r4, [r1], #0x04 strt r5, [r1], #0x04 strt r6, [r1], #0x04 strt r7, [r1], #0x04 .Lcopyout_bad3: subs r2, r2, #0x10 bge .Lcopyout_bad3_loop16 adds r2, r2, #0x10 ldmfdeq sp!, {r4-r7} RETeq /* Return now if done */ subs r2, r2, #0x04 sublt r0, r0, #0x01 blt .Lcopyout_l4 .Lcopyout_bad3_loop4: -#ifdef __ARMEB__ - mov r4, ip, lsl #24 -#else mov r4, ip, lsr #24 -#endif ldr ip, [r0], #0x04 subs r2, r2, #0x04 -#ifdef __ARMEB__ - orr r4, r4, ip, lsr #8 -#else orr r4, r4, ip, lsl #8 -#endif strt r4, [r1], #0x04 bge .Lcopyout_bad3_loop4 sub r0, r0, #0x01 .Lcopyout_l4: ldmfd sp!, {r4-r7} mov r3, #0x00 adds r2, r2, #0x04 RETeq .Lcopyout_l4_2: rsbs r2, r2, #0x03 addne pc, pc, r2, lsl #3 nop ldrb ip, [r0], #0x01 strbt ip, [r1], #0x01 ldrb ip, [r0], #0x01 strbt ip, [r1], #0x01 ldrb ip, [r0] strbt ip, [r1] RET END(copyout) Index: head/sys/arm/arm/cpufunc.c =================================================================== --- head/sys/arm/arm/cpufunc.c (revision 368152) +++ head/sys/arm/arm/cpufunc.c (revision 368153) @@ -1,589 +1,586 @@ /* $NetBSD: cpufunc.c,v 1.65 2003/11/05 12:53:15 scw Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause * * arm9 support code Copyright (C) 2001 ARM Ltd * Copyright (c) 1997 Mark Brinicombe. * Copyright (c) 1997 Causality Limited * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Causality Limited. * 4. The name of Causality Limited may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY CAUSALITY LIMITED ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL CAUSALITY LIMITED BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * RiscBSD kernel project * * cpufuncs.c * * C functions for supporting CPU / MMU / TLB specific operations. * * Created : 30/01/97 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include /* PRIMARY CACHE VARIABLES */ int arm_picache_size; int arm_picache_line_size; int arm_picache_ways; int arm_pdcache_size; /* and unified */ int arm_pdcache_line_size; int arm_pdcache_ways; int arm_pcache_type; int arm_pcache_unified; int arm_dcache_align; int arm_dcache_align_mask; u_int arm_cache_level; u_int arm_cache_type[14]; u_int arm_cache_loc; #if defined(CPU_ARM9E) static void arm10_setup(void); #endif #ifdef CPU_MV_PJ4B static void pj4bv7_setup(void); #endif #if defined(CPU_ARM1176) static void arm11x6_setup(void); #endif #if defined(CPU_CORTEXA) || defined(CPU_KRAIT) static void cortexa_setup(void); #endif #if defined(CPU_ARM9E) struct cpu_functions armv5_ec_cpufuncs = { /* CPU functions */ cpufunc_nullop, /* cpwait */ /* MMU functions */ cpufunc_control, /* control */ armv5_ec_setttb, /* Setttb */ /* TLB functions */ armv4_tlb_flushID, /* tlb_flushID */ arm9_tlb_flushID_SE, /* tlb_flushID_SE */ armv4_tlb_flushD, /* tlb_flushD */ armv4_tlb_flushD_SE, /* tlb_flushD_SE */ /* Cache operations */ armv5_ec_icache_sync_range, /* icache_sync_range */ armv5_ec_dcache_wbinv_all, /* dcache_wbinv_all */ armv5_ec_dcache_wbinv_range, /* dcache_wbinv_range */ armv5_ec_dcache_inv_range, /* dcache_inv_range */ armv5_ec_dcache_wb_range, /* dcache_wb_range */ armv4_idcache_inv_all, /* idcache_inv_all */ armv5_ec_idcache_wbinv_all, /* idcache_wbinv_all */ armv5_ec_idcache_wbinv_range, /* idcache_wbinv_range */ cpufunc_nullop, /* l2cache_wbinv_all */ (void *)cpufunc_nullop, /* l2cache_wbinv_range */ (void *)cpufunc_nullop, /* l2cache_inv_range */ (void *)cpufunc_nullop, /* l2cache_wb_range */ (void *)cpufunc_nullop, /* l2cache_drain_writebuf */ /* Other functions */ armv4_drain_writebuf, /* drain_writebuf */ (void *)cpufunc_nullop, /* sleep */ /* Soft functions */ arm9_context_switch, /* context_switch */ arm10_setup /* cpu setup */ }; struct cpu_functions sheeva_cpufuncs = { /* CPU functions */ cpufunc_nullop, /* cpwait */ /* MMU functions */ cpufunc_control, /* control */ sheeva_setttb, /* Setttb */ /* TLB functions */ armv4_tlb_flushID, /* tlb_flushID */ arm9_tlb_flushID_SE, /* tlb_flushID_SE */ armv4_tlb_flushD, /* tlb_flushD */ armv4_tlb_flushD_SE, /* tlb_flushD_SE */ /* Cache operations */ armv5_ec_icache_sync_range, /* icache_sync_range */ armv5_ec_dcache_wbinv_all, /* dcache_wbinv_all */ sheeva_dcache_wbinv_range, /* dcache_wbinv_range */ sheeva_dcache_inv_range, /* dcache_inv_range */ sheeva_dcache_wb_range, /* dcache_wb_range */ armv4_idcache_inv_all, /* idcache_inv_all */ armv5_ec_idcache_wbinv_all, /* idcache_wbinv_all */ sheeva_idcache_wbinv_range, /* idcache_wbinv_all */ sheeva_l2cache_wbinv_all, /* l2cache_wbinv_all */ sheeva_l2cache_wbinv_range, /* l2cache_wbinv_range */ sheeva_l2cache_inv_range, /* l2cache_inv_range */ sheeva_l2cache_wb_range, /* l2cache_wb_range */ (void *)cpufunc_nullop, /* l2cache_drain_writebuf */ /* Other functions */ armv4_drain_writebuf, /* drain_writebuf */ sheeva_cpu_sleep, /* sleep */ /* Soft functions */ arm9_context_switch, /* context_switch */ arm10_setup /* cpu setup */ }; #endif /* CPU_ARM9E */ #ifdef CPU_MV_PJ4B struct cpu_functions pj4bv7_cpufuncs = { /* Cache operations */ .cf_l2cache_wbinv_all = (void *)cpufunc_nullop, .cf_l2cache_wbinv_range = (void *)cpufunc_nullop, .cf_l2cache_inv_range = (void *)cpufunc_nullop, .cf_l2cache_wb_range = (void *)cpufunc_nullop, .cf_l2cache_drain_writebuf = (void *)cpufunc_nullop, /* Other functions */ .cf_sleep = (void *)cpufunc_nullop, /* Soft functions */ .cf_setup = pj4bv7_setup }; #endif /* CPU_MV_PJ4B */ #if defined(CPU_ARM1176) struct cpu_functions arm1176_cpufuncs = { /* Cache operations */ .cf_l2cache_wbinv_all = (void *)cpufunc_nullop, .cf_l2cache_wbinv_range = (void *)cpufunc_nullop, .cf_l2cache_inv_range = (void *)cpufunc_nullop, .cf_l2cache_wb_range = (void *)cpufunc_nullop, .cf_l2cache_drain_writebuf = (void *)cpufunc_nullop, /* Other functions */ .cf_sleep = arm11x6_sleep, /* Soft functions */ .cf_setup = arm11x6_setup }; #endif /*CPU_ARM1176 */ #if defined(CPU_CORTEXA) || defined(CPU_KRAIT) struct cpu_functions cortexa_cpufuncs = { /* Cache operations */ /* * Note: For CPUs using the PL310 the L2 ops are filled in when the * L2 cache controller is actually enabled. */ .cf_l2cache_wbinv_all = cpufunc_nullop, .cf_l2cache_wbinv_range = (void *)cpufunc_nullop, .cf_l2cache_inv_range = (void *)cpufunc_nullop, .cf_l2cache_wb_range = (void *)cpufunc_nullop, .cf_l2cache_drain_writebuf = (void *)cpufunc_nullop, /* Other functions */ .cf_sleep = armv7_cpu_sleep, /* Soft functions */ .cf_setup = cortexa_setup }; #endif /* CPU_CORTEXA || CPU_KRAIT */ /* * Global constants also used by locore.s */ struct cpu_functions cpufuncs; u_int cputype; #if defined (CPU_ARM9E) || \ defined(CPU_ARM1176) || \ defined(CPU_MV_PJ4B) || \ defined(CPU_CORTEXA) || defined(CPU_KRAIT) static void get_cachetype_cp15(void); /* Additional cache information local to this file. Log2 of some of the above numbers. */ static int arm_dcache_l2_nsets; static int arm_dcache_l2_assoc; static int arm_dcache_l2_linesize; static void get_cachetype_cp15(void) { u_int ctype, isize, dsize, cpuid; u_int clevel, csize, i, sel; u_int multiplier; u_char type; ctype = cp15_ctr_get(); cpuid = cp15_midr_get(); /* * ...and thus spake the ARM ARM: * * If an value corresponding to an unimplemented or * reserved ID register is encountered, the System Control * processor returns the value of the main ID register. */ if (ctype == cpuid) goto out; if (CPU_CT_FORMAT(ctype) == CPU_CT_ARMV7) { __asm __volatile("mrc p15, 1, %0, c0, c0, 1" : "=r" (clevel)); arm_cache_level = clevel; arm_cache_loc = CPU_CLIDR_LOC(arm_cache_level); i = 0; while ((type = (clevel & 0x7)) && i < 7) { if (type == CACHE_DCACHE || type == CACHE_UNI_CACHE || type == CACHE_SEP_CACHE) { sel = i << 1; __asm __volatile("mcr p15, 2, %0, c0, c0, 0" : : "r" (sel)); __asm __volatile("mrc p15, 1, %0, c0, c0, 0" : "=r" (csize)); arm_cache_type[sel] = csize; arm_dcache_align = 1 << (CPUV7_CT_xSIZE_LEN(csize) + 4); arm_dcache_align_mask = arm_dcache_align - 1; } if (type == CACHE_ICACHE || type == CACHE_SEP_CACHE) { sel = (i << 1) | 1; __asm __volatile("mcr p15, 2, %0, c0, c0, 0" : : "r" (sel)); __asm __volatile("mrc p15, 1, %0, c0, c0, 0" : "=r" (csize)); arm_cache_type[sel] = csize; } i++; clevel >>= 3; } } else { if ((ctype & CPU_CT_S) == 0) arm_pcache_unified = 1; /* * If you want to know how this code works, go read the ARM ARM. */ arm_pcache_type = CPU_CT_CTYPE(ctype); if (arm_pcache_unified == 0) { isize = CPU_CT_ISIZE(ctype); multiplier = (isize & CPU_CT_xSIZE_M) ? 3 : 2; arm_picache_line_size = 1U << (CPU_CT_xSIZE_LEN(isize) + 3); if (CPU_CT_xSIZE_ASSOC(isize) == 0) { if (isize & CPU_CT_xSIZE_M) arm_picache_line_size = 0; /* not present */ else arm_picache_ways = 1; } else { arm_picache_ways = multiplier << (CPU_CT_xSIZE_ASSOC(isize) - 1); } arm_picache_size = multiplier << (CPU_CT_xSIZE_SIZE(isize) + 8); } dsize = CPU_CT_DSIZE(ctype); multiplier = (dsize & CPU_CT_xSIZE_M) ? 3 : 2; arm_pdcache_line_size = 1U << (CPU_CT_xSIZE_LEN(dsize) + 3); if (CPU_CT_xSIZE_ASSOC(dsize) == 0) { if (dsize & CPU_CT_xSIZE_M) arm_pdcache_line_size = 0; /* not present */ else arm_pdcache_ways = 1; } else { arm_pdcache_ways = multiplier << (CPU_CT_xSIZE_ASSOC(dsize) - 1); } arm_pdcache_size = multiplier << (CPU_CT_xSIZE_SIZE(dsize) + 8); arm_dcache_align = arm_pdcache_line_size; arm_dcache_l2_assoc = CPU_CT_xSIZE_ASSOC(dsize) + multiplier - 2; arm_dcache_l2_linesize = CPU_CT_xSIZE_LEN(dsize) + 3; arm_dcache_l2_nsets = 6 + CPU_CT_xSIZE_SIZE(dsize) - CPU_CT_xSIZE_ASSOC(dsize) - CPU_CT_xSIZE_LEN(dsize); out: arm_dcache_align_mask = arm_dcache_align - 1; } } #endif /* ARM9 || XSCALE */ /* * Cannot panic here as we may not have a console yet ... */ int set_cpufuncs(void) { cputype = cp15_midr_get(); cputype &= CPU_ID_CPU_MASK; #if defined(CPU_ARM9E) if (cputype == CPU_ID_MV88FR131 || cputype == CPU_ID_MV88FR571_VD || cputype == CPU_ID_MV88FR571_41) { uint32_t sheeva_ctrl; sheeva_ctrl = (MV_DC_STREAM_ENABLE | MV_BTB_DISABLE | MV_L2_ENABLE); /* * Workaround for Marvell MV78100 CPU: Cache prefetch * mechanism may affect the cache coherency validity, * so it needs to be disabled. * * Refer to errata document MV-S501058-00C.pdf (p. 3.1 * L2 Prefetching Mechanism) for details. */ if (cputype == CPU_ID_MV88FR571_VD || cputype == CPU_ID_MV88FR571_41) sheeva_ctrl |= MV_L2_PREFETCH_DISABLE; sheeva_control_ext(0xffffffff & ~MV_WA_ENABLE, sheeva_ctrl); cpufuncs = sheeva_cpufuncs; get_cachetype_cp15(); pmap_pte_init_generic(); goto out; } else if (cputype == CPU_ID_ARM926EJS) { cpufuncs = armv5_ec_cpufuncs; get_cachetype_cp15(); pmap_pte_init_generic(); goto out; } #endif /* CPU_ARM9E */ #if defined(CPU_ARM1176) if (cputype == CPU_ID_ARM1176JZS) { cpufuncs = arm1176_cpufuncs; get_cachetype_cp15(); goto out; } #endif /* CPU_ARM1176 */ #if defined(CPU_CORTEXA) || defined(CPU_KRAIT) switch(cputype & CPU_ID_SCHEME_MASK) { case CPU_ID_CORTEXA5: case CPU_ID_CORTEXA7: case CPU_ID_CORTEXA8: case CPU_ID_CORTEXA9: case CPU_ID_CORTEXA12: case CPU_ID_CORTEXA15: case CPU_ID_CORTEXA53: case CPU_ID_CORTEXA57: case CPU_ID_CORTEXA72: case CPU_ID_KRAIT300: cpufuncs = cortexa_cpufuncs; get_cachetype_cp15(); goto out; default: break; } #endif /* CPU_CORTEXA || CPU_KRAIT */ #if defined(CPU_MV_PJ4B) if (cputype == CPU_ID_MV88SV581X_V7 || cputype == CPU_ID_MV88SV584X_V7 || cputype == CPU_ID_ARM_88SV581X_V7) { cpufuncs = pj4bv7_cpufuncs; get_cachetype_cp15(); goto out; } #endif /* CPU_MV_PJ4B */ /* * Bzzzz. And the answer was ... */ panic("No support for this CPU type (%08x) in kernel", cputype); return(ARCHITECTURE_NOT_PRESENT); out: uma_set_align(arm_dcache_align_mask); return (0); } /* * CPU Setup code */ #if defined(CPU_ARM9E) static void arm10_setup(void) { int cpuctrl, cpuctrlmask; cpuctrl = CPU_CONTROL_MMU_ENABLE | CPU_CONTROL_SYST_ENABLE | CPU_CONTROL_IC_ENABLE | CPU_CONTROL_DC_ENABLE | CPU_CONTROL_WBUF_ENABLE | CPU_CONTROL_BPRD_ENABLE; cpuctrlmask = CPU_CONTROL_MMU_ENABLE | CPU_CONTROL_SYST_ENABLE | CPU_CONTROL_IC_ENABLE | CPU_CONTROL_DC_ENABLE | CPU_CONTROL_WBUF_ENABLE | CPU_CONTROL_ROM_ENABLE | CPU_CONTROL_BEND_ENABLE | CPU_CONTROL_AFLT_ENABLE | CPU_CONTROL_BPRD_ENABLE | CPU_CONTROL_ROUNDROBIN | CPU_CONTROL_CPCLK; #ifndef ARM32_DISABLE_ALIGNMENT_FAULTS cpuctrl |= CPU_CONTROL_AFLT_ENABLE; #endif -#ifdef __ARMEB__ - cpuctrl |= CPU_CONTROL_BEND_ENABLE; -#endif /* Clear out the cache */ cpu_idcache_wbinv_all(); /* Now really make sure they are clean. */ __asm __volatile ("mcr\tp15, 0, r0, c7, c7, 0" : : ); if (vector_page == ARM_VECTORS_HIGH) cpuctrl |= CPU_CONTROL_VECRELOC; /* Set the control register */ cpu_control(0xffffffff, cpuctrl); /* And again. */ cpu_idcache_wbinv_all(); } #endif /* CPU_ARM9E || CPU_ARM10 */ #if defined(CPU_ARM1176) \ || defined(CPU_MV_PJ4B) \ || defined(CPU_CORTEXA) || defined(CPU_KRAIT) static __inline void cpu_scc_setup_ccnt(void) { /* This is how you give userland access to the CCNT and PMCn * registers. * BEWARE! This gives write access also, which may not be what * you want! */ #ifdef _PMC_USER_READ_WRITE_ /* Set PMUSERENR[0] to allow userland access */ cp15_pmuserenr_set(1); #endif #if defined(CPU_ARM1176) /* Set PMCR[2,0] to enable counters and reset CCNT */ cp15_pmcr_set(5); #else /* Set up the PMCCNTR register as a cyclecounter: * Set PMINTENCLR to 0xFFFFFFFF to block interrupts * Set PMCR[2,0] to enable counters and reset CCNT * Set PMCNTENSET to 0x80000000 to enable CCNT */ cp15_pminten_clr(0xFFFFFFFF); cp15_pmcr_set(5); cp15_pmcnten_set(0x80000000); #endif } #endif #if defined(CPU_ARM1176) static void arm11x6_setup(void) { uint32_t auxctrl, auxctrl_wax; uint32_t tmp, tmp2; uint32_t cpuid; cpuid = cp15_midr_get(); auxctrl = 0; auxctrl_wax = ~0; /* * Enable an errata workaround */ if ((cpuid & CPU_ID_CPU_MASK) == CPU_ID_ARM1176JZS) { /* ARM1176JZSr0 */ auxctrl = ARM1176_AUXCTL_PHD; auxctrl_wax = ~ARM1176_AUXCTL_PHD; } tmp = cp15_actlr_get(); tmp2 = tmp; tmp &= auxctrl_wax; tmp |= auxctrl; if (tmp != tmp2) cp15_actlr_set(tmp); cpu_scc_setup_ccnt(); } #endif /* CPU_ARM1176 */ #ifdef CPU_MV_PJ4B static void pj4bv7_setup(void) { pj4b_config(); cpu_scc_setup_ccnt(); } #endif /* CPU_MV_PJ4B */ #if defined(CPU_CORTEXA) || defined(CPU_KRAIT) static void cortexa_setup(void) { cpu_scc_setup_ccnt(); } #endif /* CPU_CORTEXA || CPU_KRAIT */ Index: head/sys/arm/arm/fusu.S =================================================================== --- head/sys/arm/arm/fusu.S (revision 368152) +++ head/sys/arm/arm/fusu.S (revision 368153) @@ -1,313 +1,304 @@ /* $NetBSD: fusu.S,v 1.10 2003/12/01 13:34:44 rearnsha Exp $ */ /*- * Copyright (c) 1996-1998 Mark Brinicombe. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Mark Brinicombe * 4. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include #include #include "assym.inc" __FBSDID("$FreeBSD$"); .syntax unified #define GET_PCB(tmp) \ mrc p15, 0, tmp, c13, c0, 4; \ add tmp, tmp, #(TD_PCB) /* * casueword32(volatile uint32_t *base, uint32_t oldval, uint32_t *oldvalp, * uint32_t newval); */ ENTRY(casueword) EENTRY_NP(casueword32) stmfd sp!, {r4, r5, r6} ldr r4, =(VM_MAXUSER_ADDRESS-3) cmp r0, r4 mvncs r0, #0 bcs 1f GET_PCB(r6) ldr r6, [r6] #ifdef DIAGNOSTIC teq r6, #0x00000000 ldmfdeq sp!, {r4, r5, r6} beq .Lfusupcbfault #endif adr r4, .Lcasuwordfault str r4, [r6, #PCB_ONFAULT] mov r5, #1 ldrex r4, [r0] cmp r4, r1 strexeq r5, r3, [r0] str r4, [r2] mov r0, #0 str r0, [r6, #PCB_ONFAULT] mov r0, r5 1: ldmfd sp!, {r4, r5, r6} RET EEND(casueword32) END(casueword) /* * Handle faults from casuword. Clean up and return -1. */ .Lcasuwordfault: mov r0, #0x00000000 str r0, [r6, #PCB_ONFAULT] mvn r0, #0 ldmfd sp!, {r4, r5, r6} RET /* * fueword(caddr_t uaddr, long *val); * Fetch an int from the user's address space. */ ENTRY(fueword) EENTRY_NP(fueword32) ldr r3, =(VM_MAXUSER_ADDRESS-3) cmp r0, r3 mvncs r0, #0 RETc(cs) GET_PCB(r2) ldr r2, [r2] #ifdef DIAGNOSTIC teq r2, #0x00000000 beq .Lfusupcbfault #endif adr r3, .Lfusufault str r3, [r2, #PCB_ONFAULT] ldrt r3, [r0] str r3, [r1] mov r0, #0x00000000 str r0, [r2, #PCB_ONFAULT] RET EEND(fueword32) END(fueword) /* * fusword(caddr_t uaddr); * Fetch a short from the user's address space. */ ENTRY(fusword) ldr r3, =(VM_MAXUSER_ADDRESS-1) cmp r0, r3 mvncs r0, #0 RETc(cs) GET_PCB(r2) ldr r2, [r2] #ifdef DIAGNOSTIC teq r2, #0x00000000 beq .Lfusupcbfault #endif adr r1, .Lfusufault str r1, [r2, #PCB_ONFAULT] ldrbt r3, [r0], #1 ldrbt ip, [r0] -#ifdef __ARMEB__ - orr r0, ip, r3, asl #8 -#else orr r0, r3, ip, asl #8 -#endif mov r1, #0x00000000 str r1, [r2, #PCB_ONFAULT] RET END(fusword) /* * fubyte(caddr_t uaddr); * Fetch a byte from the user's address space. */ ENTRY(fubyte) ldr r3, =VM_MAXUSER_ADDRESS cmp r0, r3 mvncs r0, #0 RETc(cs) GET_PCB(r2) ldr r2, [r2] #ifdef DIAGNOSTIC teq r2, #0x00000000 beq .Lfusupcbfault #endif adr r1, .Lfusufault str r1, [r2, #PCB_ONFAULT] ldrbt r3, [r0] mov r1, #0x00000000 str r1, [r2, #PCB_ONFAULT] mov r0, r3 RET END(fubyte) /* * Handle faults from [fs]u*(). Clean up and return -1. */ .Lfusufault: mov r0, #0x00000000 str r0, [r2, #PCB_ONFAULT] mvn r0, #0x00000000 RET #ifdef DIAGNOSTIC /* * Handle earlier faults from [fs]u*(), due to no pcb */ .Lfusupcbfault: mov r1, r0 adr r0, fusupcbfaulttext b _C_LABEL(panic) fusupcbfaulttext: .asciz "Yikes - no valid PCB during fusuxxx() addr=%08x\n" .align 2 #endif /* * suword(caddr_t uaddr, int x); * Store an int in the user's address space. */ ENTRY(suword) EENTRY_NP(suword32) ldr r3, =(VM_MAXUSER_ADDRESS-3) cmp r0, r3 mvncs r0, #0 RETc(cs) GET_PCB(r2) ldr r2, [r2] #ifdef DIAGNOSTIC teq r2, #0x00000000 beq .Lfusupcbfault #endif adr r3, .Lfusufault str r3, [r2, #PCB_ONFAULT] strt r1, [r0] mov r0, #0x00000000 str r0, [r2, #PCB_ONFAULT] RET EEND(suword32) END(suword) /* * susword(caddr_t uaddr, short x); * Store a short in the user's address space. */ ENTRY(susword) ldr r3, =(VM_MAXUSER_ADDRESS-1) cmp r0, r3 mvncs r0, #0 RETc(cs) GET_PCB(r2) ldr r2, [r2] #ifdef DIAGNOSTIC teq r2, #0x00000000 beq .Lfusupcbfault #endif adr r3, .Lfusufault str r3, [r2, #PCB_ONFAULT] -#ifdef __ARMEB__ - mov ip, r1, lsr #8 - strbt ip, [r0], #1 -#else strbt r1, [r0], #1 mov r1, r1, lsr #8 -#endif strbt r1, [r0] mov r0, #0x00000000 str r0, [r2, #PCB_ONFAULT] RET END(susword) /* * subyte(caddr_t uaddr, char x); * Store a byte in the user's address space. */ ENTRY(subyte) ldr r3, =VM_MAXUSER_ADDRESS cmp r0, r3 mvncs r0, #0 RETc(cs) GET_PCB(r2) ldr r2, [r2] #ifdef DIAGNOSTIC teq r2, #0x00000000 beq .Lfusupcbfault #endif adr r3, .Lfusufault str r3, [r2, #PCB_ONFAULT] strbt r1, [r0] mov r0, #0x00000000 str r0, [r2, #PCB_ONFAULT] RET END(subyte) Index: head/sys/arm/arm/in_cksum_arm.S =================================================================== --- head/sys/arm/arm/in_cksum_arm.S (revision 368152) +++ head/sys/arm/arm/in_cksum_arm.S (revision 368153) @@ -1,344 +1,331 @@ /* $NetBSD: in_cksum_arm.S,v 1.2 2003/09/23 10:01:36 scw Exp $ */ /*- * Copyright 2003 Wasabi Systems, Inc. * All rights reserved. * * Written by Steve C. Woodford for Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project by * Wasabi Systems, Inc. * 4. The name of Wasabi Systems, Inc. may not be used to endorse * or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */ /* * Hand-optimised in_cksum() and in4_cksum() implementations for ARM/armv5e */ #include "opt_inet.h" #include #include "assym.inc" __FBSDID("$FreeBSD$"); .syntax unified /* * int in_cksum(struct mbuf *m, int len) * * Entry: * r0 m * r1 len * * NOTE: Assumes 'm' is *never* NULL. */ /* LINTSTUB: Func: int in_cksum(struct mbuf *, int) */ ENTRY(in_cksum) stmfd sp!, {r4-r11,lr} mov r8, #0x00 mov r9, r1 mov r10, #0x00 mov ip, r0 .Lin_cksum_loop: ldr r1, [ip, #(M_LEN)] ldr r0, [ip, #(M_DATA)] ldr ip, [ip, #(M_NEXT)] .Lin_cksum_entry4: cmp r9, r1 movlt r1, r9 sub r9, r9, r1 eor r11, r10, r0 add r10, r10, r1 adds r2, r1, #0x00 blne _ASM_LABEL(L_cksumdata) tst r11, #0x01 movne r2, r2, ror #8 adds r8, r8, r2 adc r8, r8, #0x00 cmp ip, #0x00 bne .Lin_cksum_loop mov r1, #0xff orr r1, r1, #0xff00 and r0, r8, r1 add r0, r0, r8, lsr #16 add r0, r0, r0, lsr #16 and r0, r0, r1 eor r0, r0, r1 ldmfd sp!, {r4-r11,pc} END(in_cksum) ENTRY(do_cksum) stmfd sp!, {r4-r7, lr} bl L_cksumdata mov r0, r2 ldmfd sp!, {r4-r7, pc} END(do_cksum) /* * The main in*_cksum() workhorse... * * Entry parameters: * r0 Pointer to buffer * r1 Buffer length * lr Return address * * Returns: * r2 Accumulated 32-bit sum * * Clobbers: * r0-r7 */ /* LINTSTUB: Ignore */ ASENTRY_NP(L_cksumdata) #ifdef _ARM_ARCH_5E pld [r0] /* Pre-fetch the start of the buffer */ #endif mov r2, #0 /* We first have to word-align the buffer. */ ands r7, r0, #0x03 beq .Lcksumdata_wordaligned rsb r7, r7, #0x04 cmp r1, r7 /* Enough bytes left to make it? */ blt .Lcksumdata_endgame cmp r7, #0x02 ldrb r4, [r0], #0x01 /* Fetch 1st byte */ ldrbge r5, [r0], #0x01 /* Fetch 2nd byte */ movlt r5, #0x00 ldrbgt r6, [r0], #0x01 /* Fetch 3rd byte */ movle r6, #0x00 + /* Combine the three bytes depending on endianness and alignment */ -#ifdef __ARMEB__ - orreq r2, r5, r4, lsl #8 - orreq r2, r2, r6, lsl #24 - orrne r2, r4, r5, lsl #8 - orrne r2, r2, r6, lsl #16 -#else orreq r2, r4, r5, lsl #8 orreq r2, r2, r6, lsl #16 orrne r2, r5, r4, lsl #8 orrne r2, r2, r6, lsl #24 -#endif subs r1, r1, r7 /* Update length */ - RETeq /* All done? */ + RETeq /* All done? */ /* Buffer is now word aligned */ .Lcksumdata_wordaligned: #ifdef _ARM_ARCH_5E cmp r1, #0x04 /* Less than 4 bytes left? */ blt .Lcksumdata_endgame /* Yup */ /* Now quad-align, if necessary */ ands r7, r0, #0x04 ldrne r7, [r0], #0x04 subne r1, r1, #0x04 subs r1, r1, #0x40 blt .Lcksumdata_bigloop_end /* Note: C flag clear if branch taken */ /* * Buffer is now quad aligned. Sum 64 bytes at a time. * Note: First ldrd is hoisted above the loop, together with * setting r6 to zero to avoid stalling for results in the * loop. (r7 is live, from above). */ ldrd r4, [r0], #0x08 mov r6, #0x00 .Lcksumdata_bigloop: pld [r0, #0x18] adds r2, r2, r6 adcs r2, r2, r7 ldrd r6, [r0], #0x08 adcs r2, r2, r4 adcs r2, r2, r5 ldrd r4, [r0], #0x08 adcs r2, r2, r6 adcs r2, r2, r7 ldrd r6, [r0], #0x08 adcs r2, r2, r4 adcs r2, r2, r5 ldrd r4, [r0], #0x08 adcs r2, r2, r6 adcs r2, r2, r7 pld [r0, #0x18] ldrd r6, [r0], #0x08 adcs r2, r2, r4 adcs r2, r2, r5 ldrd r4, [r0], #0x08 adcs r2, r2, r6 adcs r2, r2, r7 ldrd r6, [r0], #0x08 adcs r2, r2, r4 adcs r2, r2, r5 adc r2, r2, #0x00 subs r1, r1, #0x40 ldrdge r4, [r0], #0x08 bge .Lcksumdata_bigloop adds r2, r2, r6 /* r6/r7 still need summing */ .Lcksumdata_bigloop_end: adcs r2, r2, r7 adc r2, r2, #0x00 #else /* !_ARM_ARCH_5E */ subs r1, r1, #0x40 blt .Lcksumdata_bigloop_end .Lcksumdata_bigloop: ldmia r0!, {r3, r4, r5, r6} adds r2, r2, r3 adcs r2, r2, r4 adcs r2, r2, r5 ldmia r0!, {r3, r4, r5, r7} adcs r2, r2, r6 adcs r2, r2, r3 adcs r2, r2, r4 adcs r2, r2, r5 ldmia r0!, {r3, r4, r5, r6} adcs r2, r2, r7 adcs r2, r2, r3 adcs r2, r2, r4 adcs r2, r2, r5 ldmia r0!, {r3, r4, r5, r7} adcs r2, r2, r6 adcs r2, r2, r3 adcs r2, r2, r4 adcs r2, r2, r5 adcs r2, r2, r7 adc r2, r2, #0x00 subs r1, r1, #0x40 bge .Lcksumdata_bigloop .Lcksumdata_bigloop_end: #endif adds r1, r1, #0x40 RETeq cmp r1, #0x20 #ifdef _ARM_ARCH_5E ldrdge r4, [r0], #0x08 /* Avoid stalling pld and result */ blt .Lcksumdata_less_than_32 pld [r0, #0x18] ldrd r6, [r0], #0x08 adds r2, r2, r4 adcs r2, r2, r5 ldrd r4, [r0], #0x08 adcs r2, r2, r6 adcs r2, r2, r7 ldrd r6, [r0], #0x08 adcs r2, r2, r4 adcs r2, r2, r5 adcs r2, r2, r6 /* XXX: Unavoidable result stall */ adcs r2, r2, r7 #else blt .Lcksumdata_less_than_32 ldmia r0!, {r3, r4, r5, r6} adds r2, r2, r3 adcs r2, r2, r4 adcs r2, r2, r5 ldmia r0!, {r3, r4, r5, r7} adcs r2, r2, r6 adcs r2, r2, r3 adcs r2, r2, r4 adcs r2, r2, r5 adcs r2, r2, r7 #endif adc r2, r2, #0x00 subs r1, r1, #0x20 RETeq .Lcksumdata_less_than_32: /* There are less than 32 bytes left */ and r3, r1, #0x18 rsb r4, r3, #0x18 sub r1, r1, r3 adds r4, r4, r4, lsr #1 /* Side effect: Clear carry flag */ addne pc, pc, r4 nop /* * Note: We use ldm here, even on armv5e, since the combined issue/result * latencies for ldm and ldrd are the same. Using ldm avoids needless #ifdefs. */ /* At least 24 bytes remaining... */ ldmia r0!, {r4, r5} adcs r2, r2, r4 adcs r2, r2, r5 /* At least 16 bytes remaining... */ ldmia r0!, {r4, r5} adcs r2, r2, r4 adcs r2, r2, r5 /* At least 8 bytes remaining... */ ldmia r0!, {r4, r5} adcs r2, r2, r4 adcs r2, r2, r5 /* Less than 8 bytes remaining... */ adc r2, r2, #0x00 subs r1, r1, #0x04 blt .Lcksumdata_lessthan4 ldr r4, [r0], #0x04 sub r1, r1, #0x04 adds r2, r2, r4 adc r2, r2, #0x00 /* Deal with < 4 bytes remaining */ .Lcksumdata_lessthan4: adds r1, r1, #0x04 RETeq /* Deal with 1 to 3 remaining bytes, possibly misaligned */ .Lcksumdata_endgame: ldrb r3, [r0] /* Fetch first byte */ cmp r1, #0x02 ldrbge r4, [r0, #0x01] /* Fetch 2nd and 3rd as necessary */ movlt r4, #0x00 ldrbgt r5, [r0, #0x02] movle r5, #0x00 /* Combine the three bytes depending on endianness and alignment */ tst r0, #0x01 -#ifdef __ARMEB__ - orreq r3, r4, r3, lsl #8 - orreq r3, r3, r5, lsl #24 - orrne r3, r3, r4, lsl #8 - orrne r3, r3, r5, lsl #16 -#else orreq r3, r3, r4, lsl #8 orreq r3, r3, r5, lsl #16 orrne r3, r4, r3, lsl #8 orrne r3, r3, r5, lsl #24 -#endif adds r2, r2, r3 adc r2, r2, #0x00 RET END(L_cksumdata) Index: head/sys/arm/arm/support.S =================================================================== --- head/sys/arm/arm/support.S (revision 368152) +++ head/sys/arm/arm/support.S (revision 368153) @@ -1,2965 +1,2420 @@ /*- * Copyright (c) 2004 Olivier Houchard * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright 2003 Wasabi Systems, Inc. * All rights reserved. * * Written by Steve C. Woodford for Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project by * Wasabi Systems, Inc. * 4. The name of Wasabi Systems, Inc. may not be used to endorse * or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1997 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Neil A. Carson and Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "assym.inc" .syntax unified .L_arm_memcpy: .word _C_LABEL(_arm_memcpy) .L_arm_bzero: .word _C_LABEL(_arm_bzero) .L_min_memcpy_size: .word _C_LABEL(_min_memcpy_size) .L_min_bzero_size: .word _C_LABEL(_min_bzero_size) /* * memset: Sets a block of memory to the specified value * * On entry: * r0 - dest address * r1 - byte to write * r2 - number of bytes to write * * On exit: * r0 - dest address */ /* LINTSTUB: Func: void bzero(void *, size_t) */ ENTRY(bzero) ldr r3, .L_arm_bzero ldr r3, [r3] cmp r3, #0 beq .Lnormal0 ldr r2, .L_min_bzero_size ldr r2, [r2] cmp r1, r2 blt .Lnormal0 stmfd sp!, {r0, r1, lr} mov r2, #0 mov lr, pc mov pc, r3 cmp r0, #0 ldmfd sp!, {r0, r1, lr} RETeq .Lnormal0: mov r3, #0x00 b do_memset END(bzero) /* LINTSTUB: Func: void *memset(void *, int, size_t) */ ENTRY(memset) and r3, r1, #0xff /* We deal with bytes */ mov r1, r2 do_memset: cmp r1, #0x04 /* Do we have less than 4 bytes */ mov ip, r0 blt .Lmemset_lessthanfour /* Ok first we will word align the address */ ands r2, ip, #0x03 /* Get the bottom two bits */ bne .Lmemset_wordunaligned /* The address is not word aligned */ /* We are now word aligned */ .Lmemset_wordaligned: orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */ #ifdef _ARM_ARCH_5E tst ip, #0x04 /* Quad-align for armv5e */ #else cmp r1, #0x10 #endif orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */ #ifdef _ARM_ARCH_5E subne r1, r1, #0x04 /* Quad-align if necessary */ strne r3, [ip], #0x04 cmp r1, #0x10 #endif blt .Lmemset_loop4 /* If less than 16 then use words */ mov r2, r3 /* Duplicate data */ cmp r1, #0x80 /* If < 128 then skip the big loop */ blt .Lmemset_loop32 /* Do 128 bytes at a time */ .Lmemset_loop128: subs r1, r1, #0x80 #ifdef _ARM_ARCH_5E strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 #else stmiage ip!, {r2-r3} stmiage ip!, {r2-r3} stmiage ip!, {r2-r3} stmiage ip!, {r2-r3} stmiage ip!, {r2-r3} stmiage ip!, {r2-r3} stmiage ip!, {r2-r3} stmiage ip!, {r2-r3} stmiage ip!, {r2-r3} stmiage ip!, {r2-r3} stmiage ip!, {r2-r3} stmiage ip!, {r2-r3} stmiage ip!, {r2-r3} stmiage ip!, {r2-r3} stmiage ip!, {r2-r3} stmiage ip!, {r2-r3} #endif bgt .Lmemset_loop128 RETeq /* Zero length so just exit */ add r1, r1, #0x80 /* Adjust for extra sub */ /* Do 32 bytes at a time */ .Lmemset_loop32: subs r1, r1, #0x20 #ifdef _ARM_ARCH_5E strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 #else stmiage ip!, {r2-r3} stmiage ip!, {r2-r3} stmiage ip!, {r2-r3} stmiage ip!, {r2-r3} #endif bgt .Lmemset_loop32 RETeq /* Zero length so just exit */ adds r1, r1, #0x10 /* Partially adjust for extra sub */ /* Deal with 16 bytes or more */ #ifdef _ARM_ARCH_5E strdge r2, [ip], #0x08 strdge r2, [ip], #0x08 #else stmiage ip!, {r2-r3} stmiage ip!, {r2-r3} #endif RETeq /* Zero length so just exit */ addlt r1, r1, #0x10 /* Possibly adjust for extra sub */ /* We have at least 4 bytes so copy as words */ .Lmemset_loop4: subs r1, r1, #0x04 strge r3, [ip], #0x04 bgt .Lmemset_loop4 RETeq /* Zero length so just exit */ #ifdef _ARM_ARCH_5E /* Compensate for 64-bit alignment check */ adds r1, r1, #0x04 RETeq cmp r1, #2 #else cmp r1, #-2 #endif strb r3, [ip], #0x01 /* Set 1 byte */ strbge r3, [ip], #0x01 /* Set another byte */ strbgt r3, [ip] /* and a third */ RET /* Exit */ .Lmemset_wordunaligned: rsb r2, r2, #0x004 strb r3, [ip], #0x01 /* Set 1 byte */ cmp r2, #0x02 strbge r3, [ip], #0x01 /* Set another byte */ sub r1, r1, r2 strbgt r3, [ip], #0x01 /* and a third */ cmp r1, #0x04 /* More than 4 bytes left? */ bge .Lmemset_wordaligned /* Yup */ .Lmemset_lessthanfour: cmp r1, #0x00 RETeq /* Zero length so exit */ strb r3, [ip], #0x01 /* Set 1 byte */ cmp r1, #0x02 strbge r3, [ip], #0x01 /* Set another byte */ strbgt r3, [ip] /* and a third */ RET /* Exit */ EEND(memset) END(bzero) ENTRY(bcmp) mov ip, r0 cmp r2, #0x06 beq .Lmemcmp_6bytes mov r0, #0x00 /* Are both addresses aligned the same way? */ cmp r2, #0x00 eorsne r3, ip, r1 RETeq /* len == 0, or same addresses! */ tst r3, #0x03 subne r2, r2, #0x01 bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */ /* Word-align the addresses, if necessary */ sub r3, r1, #0x05 ands r3, r3, #0x03 add r3, r3, r3, lsl #1 addne pc, pc, r3, lsl #3 nop /* Compare up to 3 bytes */ ldrb r0, [ip], #0x01 ldrb r3, [r1], #0x01 subs r0, r0, r3 RETne subs r2, r2, #0x01 RETeq /* Compare up to 2 bytes */ ldrb r0, [ip], #0x01 ldrb r3, [r1], #0x01 subs r0, r0, r3 RETne subs r2, r2, #0x01 RETeq /* Compare 1 byte */ ldrb r0, [ip], #0x01 ldrb r3, [r1], #0x01 subs r0, r0, r3 RETne subs r2, r2, #0x01 RETeq /* Compare 4 bytes at a time, if possible */ subs r2, r2, #0x04 bcc .Lmemcmp_bytewise .Lmemcmp_word_aligned: ldr r0, [ip], #0x04 ldr r3, [r1], #0x04 subs r2, r2, #0x04 cmpcs r0, r3 beq .Lmemcmp_word_aligned sub r0, r0, r3 /* Correct for extra subtraction, and check if done */ adds r2, r2, #0x04 cmpeq r0, #0x00 /* If done, did all bytes match? */ RETeq /* Yup. Just return */ /* Re-do the final word byte-wise */ sub ip, ip, #0x04 sub r1, r1, #0x04 .Lmemcmp_bytewise: add r2, r2, #0x03 .Lmemcmp_bytewise2: ldrb r0, [ip], #0x01 ldrb r3, [r1], #0x01 subs r2, r2, #0x01 cmpcs r0, r3 beq .Lmemcmp_bytewise2 sub r0, r0, r3 RET /* * 6 byte compares are very common, thanks to the network stack. * This code is hand-scheduled to reduce the number of stalls for * load results. Everything else being equal, this will be ~32% * faster than a byte-wise memcmp. */ .align 5 .Lmemcmp_6bytes: ldrb r3, [r1, #0x00] /* r3 = b2#0 */ ldrb r0, [ip, #0x00] /* r0 = b1#0 */ ldrb r2, [r1, #0x01] /* r2 = b2#1 */ subs r0, r0, r3 /* r0 = b1#0 - b2#0 */ ldrbeq r3, [ip, #0x01] /* r3 = b1#1 */ RETne /* Return if mismatch on #0 */ subs r0, r3, r2 /* r0 = b1#1 - b2#1 */ ldrbeq r3, [r1, #0x02] /* r3 = b2#2 */ ldrbeq r0, [ip, #0x02] /* r0 = b1#2 */ RETne /* Return if mismatch on #1 */ ldrb r2, [r1, #0x03] /* r2 = b2#3 */ subs r0, r0, r3 /* r0 = b1#2 - b2#2 */ ldrbeq r3, [ip, #0x03] /* r3 = b1#3 */ RETne /* Return if mismatch on #2 */ subs r0, r3, r2 /* r0 = b1#3 - b2#3 */ ldrbeq r3, [r1, #0x04] /* r3 = b2#4 */ ldrbeq r0, [ip, #0x04] /* r0 = b1#4 */ RETne /* Return if mismatch on #3 */ ldrb r2, [r1, #0x05] /* r2 = b2#5 */ subs r0, r0, r3 /* r0 = b1#4 - b2#4 */ ldrbeq r3, [ip, #0x05] /* r3 = b1#5 */ RETne /* Return if mismatch on #4 */ sub r0, r3, r2 /* r0 = b1#5 - b2#5 */ RET END(bcmp) ENTRY(bcopy) /* switch the source and destination registers */ eor r0, r1, r0 eor r1, r0, r1 eor r0, r1, r0 EENTRY(memmove) /* Do the buffers overlap? */ cmp r0, r1 RETeq /* Bail now if src/dst are the same */ subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */ subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */ cmp r3, r2 /* if (r3 < len) we have an overlap */ bcc PIC_SYM(_C_LABEL(memcpy), PLT) /* Determine copy direction */ cmp r1, r0 bcc .Lmemmove_backwards moveq r0, #0 /* Quick abort for len=0 */ RETeq stmdb sp!, {r0, lr} /* memmove() returns dest addr */ subs r2, r2, #4 blt .Lmemmove_fl4 /* less than 4 bytes */ ands r12, r0, #3 bne .Lmemmove_fdestul /* oh unaligned destination addr */ ands r12, r1, #3 bne .Lmemmove_fsrcul /* oh unaligned source addr */ .Lmemmove_ft8: /* We have aligned source and destination */ subs r2, r2, #8 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ subs r2, r2, #0x14 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ stmdb sp!, {r4} /* borrow r4 */ /* blat 32 bytes at a time */ /* XXX for really big copies perhaps we should use more registers */ .Lmemmove_floop32: ldmia r1!, {r3, r4, r12, lr} stmia r0!, {r3, r4, r12, lr} ldmia r1!, {r3, r4, r12, lr} stmia r0!, {r3, r4, r12, lr} subs r2, r2, #0x20 bge .Lmemmove_floop32 cmn r2, #0x10 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ stmiage r0!, {r3, r4, r12, lr} subge r2, r2, #0x10 ldmia sp!, {r4} /* return r4 */ .Lmemmove_fl32: adds r2, r2, #0x14 /* blat 12 bytes at a time */ .Lmemmove_floop12: ldmiage r1!, {r3, r12, lr} stmiage r0!, {r3, r12, lr} subsge r2, r2, #0x0c bge .Lmemmove_floop12 .Lmemmove_fl12: adds r2, r2, #8 blt .Lmemmove_fl4 subs r2, r2, #4 ldrlt r3, [r1], #4 strlt r3, [r0], #4 ldmiage r1!, {r3, r12} stmiage r0!, {r3, r12} subge r2, r2, #4 .Lmemmove_fl4: /* less than 4 bytes to go */ adds r2, r2, #4 ldmiaeq sp!, {r0, pc} /* done */ /* copy the crud byte at a time */ cmp r2, #2 ldrb r3, [r1], #1 strb r3, [r0], #1 ldrbge r3, [r1], #1 strbge r3, [r0], #1 ldrbgt r3, [r1], #1 strbgt r3, [r0], #1 ldmia sp!, {r0, pc} /* erg - unaligned destination */ .Lmemmove_fdestul: rsb r12, r12, #4 cmp r12, #2 /* align destination with byte copies */ ldrb r3, [r1], #1 strb r3, [r0], #1 ldrbge r3, [r1], #1 strbge r3, [r0], #1 ldrbgt r3, [r1], #1 strbgt r3, [r0], #1 subs r2, r2, r12 blt .Lmemmove_fl4 /* less the 4 bytes */ ands r12, r1, #3 beq .Lmemmove_ft8 /* we have an aligned source */ /* erg - unaligned source */ /* This is where it gets nasty ... */ .Lmemmove_fsrcul: bic r1, r1, #3 ldr lr, [r1], #4 cmp r12, #2 bgt .Lmemmove_fsrcul3 beq .Lmemmove_fsrcul2 cmp r2, #0x0c blt .Lmemmove_fsrcul1loop4 sub r2, r2, #0x0c stmdb sp!, {r4, r5} .Lmemmove_fsrcul1loop16: -#ifdef __ARMEB__ - mov r3, lr, lsl #8 -#else mov r3, lr, lsr #8 -#endif ldmia r1!, {r4, r5, r12, lr} -#ifdef __ARMEB__ - orr r3, r3, r4, lsr #24 - mov r4, r4, lsl #8 - orr r4, r4, r5, lsr #24 - mov r5, r5, lsl #8 - orr r5, r5, r12, lsr #24 - mov r12, r12, lsl #8 - orr r12, r12, lr, lsr #24 -#else orr r3, r3, r4, lsl #24 mov r4, r4, lsr #8 orr r4, r4, r5, lsl #24 mov r5, r5, lsr #8 orr r5, r5, r12, lsl #24 mov r12, r12, lsr #8 orr r12, r12, lr, lsl #24 -#endif stmia r0!, {r3-r5, r12} subs r2, r2, #0x10 bge .Lmemmove_fsrcul1loop16 ldmia sp!, {r4, r5} adds r2, r2, #0x0c blt .Lmemmove_fsrcul1l4 .Lmemmove_fsrcul1loop4: -#ifdef __ARMEB__ - mov r12, lr, lsl #8 -#else mov r12, lr, lsr #8 -#endif ldr lr, [r1], #4 -#ifdef __ARMEB__ - orr r12, r12, lr, lsr #24 -#else orr r12, r12, lr, lsl #24 -#endif str r12, [r0], #4 subs r2, r2, #4 bge .Lmemmove_fsrcul1loop4 .Lmemmove_fsrcul1l4: sub r1, r1, #3 b .Lmemmove_fl4 .Lmemmove_fsrcul2: cmp r2, #0x0c blt .Lmemmove_fsrcul2loop4 sub r2, r2, #0x0c stmdb sp!, {r4, r5} .Lmemmove_fsrcul2loop16: -#ifdef __ARMEB__ - mov r3, lr, lsl #16 -#else mov r3, lr, lsr #16 -#endif ldmia r1!, {r4, r5, r12, lr} -#ifdef __ARMEB__ - orr r3, r3, r4, lsr #16 - mov r4, r4, lsl #16 - orr r4, r4, r5, lsr #16 - mov r5, r5, lsl #16 - orr r5, r5, r12, lsr #16 - mov r12, r12, lsl #16 - orr r12, r12, lr, lsr #16 -#else orr r3, r3, r4, lsl #16 mov r4, r4, lsr #16 orr r4, r4, r5, lsl #16 mov r5, r5, lsr #16 orr r5, r5, r12, lsl #16 mov r12, r12, lsr #16 orr r12, r12, lr, lsl #16 -#endif stmia r0!, {r3-r5, r12} subs r2, r2, #0x10 bge .Lmemmove_fsrcul2loop16 ldmia sp!, {r4, r5} adds r2, r2, #0x0c blt .Lmemmove_fsrcul2l4 .Lmemmove_fsrcul2loop4: -#ifdef __ARMEB__ - mov r12, lr, lsl #16 -#else mov r12, lr, lsr #16 -#endif ldr lr, [r1], #4 -#ifdef __ARMEB__ - orr r12, r12, lr, lsr #16 -#else orr r12, r12, lr, lsl #16 -#endif str r12, [r0], #4 subs r2, r2, #4 bge .Lmemmove_fsrcul2loop4 .Lmemmove_fsrcul2l4: sub r1, r1, #2 b .Lmemmove_fl4 .Lmemmove_fsrcul3: cmp r2, #0x0c blt .Lmemmove_fsrcul3loop4 sub r2, r2, #0x0c stmdb sp!, {r4, r5} .Lmemmove_fsrcul3loop16: -#ifdef __ARMEB__ - mov r3, lr, lsl #24 -#else mov r3, lr, lsr #24 -#endif ldmia r1!, {r4, r5, r12, lr} -#ifdef __ARMEB__ - orr r3, r3, r4, lsr #8 - mov r4, r4, lsl #24 - orr r4, r4, r5, lsr #8 - mov r5, r5, lsl #24 - orr r5, r5, r12, lsr #8 - mov r12, r12, lsl #24 - orr r12, r12, lr, lsr #8 -#else orr r3, r3, r4, lsl #8 mov r4, r4, lsr #24 orr r4, r4, r5, lsl #8 mov r5, r5, lsr #24 orr r5, r5, r12, lsl #8 mov r12, r12, lsr #24 orr r12, r12, lr, lsl #8 -#endif stmia r0!, {r3-r5, r12} subs r2, r2, #0x10 bge .Lmemmove_fsrcul3loop16 ldmia sp!, {r4, r5} adds r2, r2, #0x0c blt .Lmemmove_fsrcul3l4 .Lmemmove_fsrcul3loop4: -#ifdef __ARMEB__ - mov r12, lr, lsl #24 -#else mov r12, lr, lsr #24 -#endif ldr lr, [r1], #4 -#ifdef __ARMEB__ - orr r12, r12, lr, lsr #8 -#else orr r12, r12, lr, lsl #8 -#endif str r12, [r0], #4 subs r2, r2, #4 bge .Lmemmove_fsrcul3loop4 .Lmemmove_fsrcul3l4: sub r1, r1, #1 b .Lmemmove_fl4 .Lmemmove_backwards: add r1, r1, r2 add r0, r0, r2 subs r2, r2, #4 blt .Lmemmove_bl4 /* less than 4 bytes */ ands r12, r0, #3 bne .Lmemmove_bdestul /* oh unaligned destination addr */ ands r12, r1, #3 bne .Lmemmove_bsrcul /* oh unaligned source addr */ .Lmemmove_bt8: /* We have aligned source and destination */ subs r2, r2, #8 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ stmdb sp!, {r4, lr} subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ blt .Lmemmove_bl32 /* blat 32 bytes at a time */ /* XXX for really big copies perhaps we should use more registers */ .Lmemmove_bloop32: ldmdb r1!, {r3, r4, r12, lr} stmdb r0!, {r3, r4, r12, lr} ldmdb r1!, {r3, r4, r12, lr} stmdb r0!, {r3, r4, r12, lr} subs r2, r2, #0x20 bge .Lmemmove_bloop32 .Lmemmove_bl32: cmn r2, #0x10 ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ stmdbge r0!, {r3, r4, r12, lr} subge r2, r2, #0x10 adds r2, r2, #0x14 ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ stmdbge r0!, {r3, r12, lr} subge r2, r2, #0x0c ldmia sp!, {r4, lr} .Lmemmove_bl12: adds r2, r2, #8 blt .Lmemmove_bl4 subs r2, r2, #4 ldrlt r3, [r1, #-4]! strlt r3, [r0, #-4]! ldmdbge r1!, {r3, r12} stmdbge r0!, {r3, r12} subge r2, r2, #4 .Lmemmove_bl4: /* less than 4 bytes to go */ adds r2, r2, #4 RETeq /* done */ /* copy the crud byte at a time */ cmp r2, #2 ldrb r3, [r1, #-1]! strb r3, [r0, #-1]! ldrbge r3, [r1, #-1]! strbge r3, [r0, #-1]! ldrbgt r3, [r1, #-1]! strbgt r3, [r0, #-1]! RET /* erg - unaligned destination */ .Lmemmove_bdestul: cmp r12, #2 /* align destination with byte copies */ ldrb r3, [r1, #-1]! strb r3, [r0, #-1]! ldrbge r3, [r1, #-1]! strbge r3, [r0, #-1]! ldrbgt r3, [r1, #-1]! strbgt r3, [r0, #-1]! subs r2, r2, r12 blt .Lmemmove_bl4 /* less than 4 bytes to go */ ands r12, r1, #3 beq .Lmemmove_bt8 /* we have an aligned source */ /* erg - unaligned source */ /* This is where it gets nasty ... */ .Lmemmove_bsrcul: bic r1, r1, #3 ldr r3, [r1, #0] cmp r12, #2 blt .Lmemmove_bsrcul1 beq .Lmemmove_bsrcul2 cmp r2, #0x0c blt .Lmemmove_bsrcul3loop4 sub r2, r2, #0x0c stmdb sp!, {r4, r5, lr} .Lmemmove_bsrcul3loop16: -#ifdef __ARMEB__ - mov lr, r3, lsr #8 -#else mov lr, r3, lsl #8 -#endif ldmdb r1!, {r3-r5, r12} -#ifdef __ARMEB__ - orr lr, lr, r12, lsl #24 - mov r12, r12, lsr #8 - orr r12, r12, r5, lsl #24 - mov r5, r5, lsr #8 - orr r5, r5, r4, lsl #24 - mov r4, r4, lsr #8 - orr r4, r4, r3, lsl #24 -#else orr lr, lr, r12, lsr #24 mov r12, r12, lsl #8 orr r12, r12, r5, lsr #24 mov r5, r5, lsl #8 orr r5, r5, r4, lsr #24 mov r4, r4, lsl #8 orr r4, r4, r3, lsr #24 -#endif stmdb r0!, {r4, r5, r12, lr} subs r2, r2, #0x10 bge .Lmemmove_bsrcul3loop16 ldmia sp!, {r4, r5, lr} adds r2, r2, #0x0c blt .Lmemmove_bsrcul3l4 .Lmemmove_bsrcul3loop4: -#ifdef __ARMEB__ - mov r12, r3, lsr #8 -#else mov r12, r3, lsl #8 -#endif ldr r3, [r1, #-4]! -#ifdef __ARMEB__ - orr r12, r12, r3, lsl #24 -#else orr r12, r12, r3, lsr #24 -#endif str r12, [r0, #-4]! subs r2, r2, #4 bge .Lmemmove_bsrcul3loop4 .Lmemmove_bsrcul3l4: add r1, r1, #3 b .Lmemmove_bl4 .Lmemmove_bsrcul2: cmp r2, #0x0c blt .Lmemmove_bsrcul2loop4 sub r2, r2, #0x0c stmdb sp!, {r4, r5, lr} .Lmemmove_bsrcul2loop16: -#ifdef __ARMEB__ - mov lr, r3, lsr #16 -#else mov lr, r3, lsl #16 -#endif ldmdb r1!, {r3-r5, r12} -#ifdef __ARMEB__ - orr lr, lr, r12, lsl #16 - mov r12, r12, lsr #16 - orr r12, r12, r5, lsl #16 - mov r5, r5, lsr #16 - orr r5, r5, r4, lsl #16 - mov r4, r4, lsr #16 - orr r4, r4, r3, lsl #16 -#else orr lr, lr, r12, lsr #16 mov r12, r12, lsl #16 orr r12, r12, r5, lsr #16 mov r5, r5, lsl #16 orr r5, r5, r4, lsr #16 mov r4, r4, lsl #16 orr r4, r4, r3, lsr #16 -#endif stmdb r0!, {r4, r5, r12, lr} subs r2, r2, #0x10 bge .Lmemmove_bsrcul2loop16 ldmia sp!, {r4, r5, lr} adds r2, r2, #0x0c blt .Lmemmove_bsrcul2l4 .Lmemmove_bsrcul2loop4: -#ifdef __ARMEB__ - mov r12, r3, lsr #16 -#else mov r12, r3, lsl #16 -#endif ldr r3, [r1, #-4]! -#ifdef __ARMEB__ - orr r12, r12, r3, lsl #16 -#else orr r12, r12, r3, lsr #16 -#endif str r12, [r0, #-4]! subs r2, r2, #4 bge .Lmemmove_bsrcul2loop4 .Lmemmove_bsrcul2l4: add r1, r1, #2 b .Lmemmove_bl4 .Lmemmove_bsrcul1: cmp r2, #0x0c blt .Lmemmove_bsrcul1loop4 sub r2, r2, #0x0c stmdb sp!, {r4, r5, lr} .Lmemmove_bsrcul1loop32: -#ifdef __ARMEB__ - mov lr, r3, lsr #24 -#else mov lr, r3, lsl #24 -#endif ldmdb r1!, {r3-r5, r12} -#ifdef __ARMEB__ - orr lr, lr, r12, lsl #8 - mov r12, r12, lsr #24 - orr r12, r12, r5, lsl #8 - mov r5, r5, lsr #24 - orr r5, r5, r4, lsl #8 - mov r4, r4, lsr #24 - orr r4, r4, r3, lsl #8 -#else orr lr, lr, r12, lsr #8 mov r12, r12, lsl #24 orr r12, r12, r5, lsr #8 mov r5, r5, lsl #24 orr r5, r5, r4, lsr #8 mov r4, r4, lsl #24 orr r4, r4, r3, lsr #8 -#endif stmdb r0!, {r4, r5, r12, lr} subs r2, r2, #0x10 bge .Lmemmove_bsrcul1loop32 ldmia sp!, {r4, r5, lr} adds r2, r2, #0x0c blt .Lmemmove_bsrcul1l4 .Lmemmove_bsrcul1loop4: -#ifdef __ARMEB__ - mov r12, r3, lsr #24 -#else mov r12, r3, lsl #24 -#endif ldr r3, [r1, #-4]! -#ifdef __ARMEB__ - orr r12, r12, r3, lsl #8 -#else orr r12, r12, r3, lsr #8 -#endif str r12, [r0, #-4]! subs r2, r2, #4 bge .Lmemmove_bsrcul1loop4 .Lmemmove_bsrcul1l4: add r1, r1, #1 b .Lmemmove_bl4 EEND(memmove) END(bcopy) #if !defined(_ARM_ARCH_5E) ENTRY(memcpy) /* save leaf functions having to store this away */ /* Do not check arm_memcpy if we're running from flash */ #if defined(FLASHADDR) && defined(PHYSADDR) #if FLASHADDR > PHYSADDR ldr r3, =FLASHADDR cmp r3, pc bls .Lnormal #else ldr r3, =FLASHADDR cmp r3, pc bhi .Lnormal #endif #endif ldr r3, .L_arm_memcpy ldr r3, [r3] cmp r3, #0 beq .Lnormal ldr r3, .L_min_memcpy_size ldr r3, [r3] cmp r2, r3 blt .Lnormal stmfd sp!, {r0-r2, r4, lr} mov r3, #0 ldr r4, .L_arm_memcpy mov lr, pc ldr pc, [r4] cmp r0, #0 ldmfd sp!, {r0-r2, r4, lr} RETeq .Lnormal: stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ subs r2, r2, #4 blt .Lmemcpy_l4 /* less than 4 bytes */ ands r12, r0, #3 bne .Lmemcpy_destul /* oh unaligned destination addr */ ands r12, r1, #3 bne .Lmemcpy_srcul /* oh unaligned source addr */ .Lmemcpy_t8: /* We have aligned source and destination */ subs r2, r2, #8 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ subs r2, r2, #0x14 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ stmdb sp!, {r4} /* borrow r4 */ /* blat 32 bytes at a time */ /* XXX for really big copies perhaps we should use more registers */ .Lmemcpy_loop32: ldmia r1!, {r3, r4, r12, lr} stmia r0!, {r3, r4, r12, lr} ldmia r1!, {r3, r4, r12, lr} stmia r0!, {r3, r4, r12, lr} subs r2, r2, #0x20 bge .Lmemcpy_loop32 cmn r2, #0x10 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ stmiage r0!, {r3, r4, r12, lr} subge r2, r2, #0x10 ldmia sp!, {r4} /* return r4 */ .Lmemcpy_l32: adds r2, r2, #0x14 /* blat 12 bytes at a time */ .Lmemcpy_loop12: ldmiage r1!, {r3, r12, lr} stmiage r0!, {r3, r12, lr} subsge r2, r2, #0x0c bge .Lmemcpy_loop12 .Lmemcpy_l12: adds r2, r2, #8 blt .Lmemcpy_l4 subs r2, r2, #4 ldrlt r3, [r1], #4 strlt r3, [r0], #4 ldmiage r1!, {r3, r12} stmiage r0!, {r3, r12} subge r2, r2, #4 .Lmemcpy_l4: /* less than 4 bytes to go */ adds r2, r2, #4 #ifdef __APCS_26_ ldmiaeq sp!, {r0, pc}^ /* done */ #else ldmiaeq sp!, {r0, pc} /* done */ #endif /* copy the crud byte at a time */ cmp r2, #2 ldrb r3, [r1], #1 strb r3, [r0], #1 ldrbge r3, [r1], #1 strbge r3, [r0], #1 ldrbgt r3, [r1], #1 strbgt r3, [r0], #1 ldmia sp!, {r0, pc} /* erg - unaligned destination */ .Lmemcpy_destul: rsb r12, r12, #4 cmp r12, #2 /* align destination with byte copies */ ldrb r3, [r1], #1 strb r3, [r0], #1 ldrbge r3, [r1], #1 strbge r3, [r0], #1 ldrbgt r3, [r1], #1 strbgt r3, [r0], #1 subs r2, r2, r12 blt .Lmemcpy_l4 /* less the 4 bytes */ ands r12, r1, #3 beq .Lmemcpy_t8 /* we have an aligned source */ /* erg - unaligned source */ /* This is where it gets nasty ... */ .Lmemcpy_srcul: bic r1, r1, #3 ldr lr, [r1], #4 cmp r12, #2 bgt .Lmemcpy_srcul3 beq .Lmemcpy_srcul2 cmp r2, #0x0c blt .Lmemcpy_srcul1loop4 sub r2, r2, #0x0c stmdb sp!, {r4, r5} .Lmemcpy_srcul1loop16: mov r3, lr, lsr #8 ldmia r1!, {r4, r5, r12, lr} orr r3, r3, r4, lsl #24 mov r4, r4, lsr #8 orr r4, r4, r5, lsl #24 mov r5, r5, lsr #8 orr r5, r5, r12, lsl #24 mov r12, r12, lsr #8 orr r12, r12, lr, lsl #24 stmia r0!, {r3-r5, r12} subs r2, r2, #0x10 bge .Lmemcpy_srcul1loop16 ldmia sp!, {r4, r5} adds r2, r2, #0x0c blt .Lmemcpy_srcul1l4 .Lmemcpy_srcul1loop4: mov r12, lr, lsr #8 ldr lr, [r1], #4 orr r12, r12, lr, lsl #24 str r12, [r0], #4 subs r2, r2, #4 bge .Lmemcpy_srcul1loop4 .Lmemcpy_srcul1l4: sub r1, r1, #3 b .Lmemcpy_l4 .Lmemcpy_srcul2: cmp r2, #0x0c blt .Lmemcpy_srcul2loop4 sub r2, r2, #0x0c stmdb sp!, {r4, r5} .Lmemcpy_srcul2loop16: mov r3, lr, lsr #16 ldmia r1!, {r4, r5, r12, lr} orr r3, r3, r4, lsl #16 mov r4, r4, lsr #16 orr r4, r4, r5, lsl #16 mov r5, r5, lsr #16 orr r5, r5, r12, lsl #16 mov r12, r12, lsr #16 orr r12, r12, lr, lsl #16 stmia r0!, {r3-r5, r12} subs r2, r2, #0x10 bge .Lmemcpy_srcul2loop16 ldmia sp!, {r4, r5} adds r2, r2, #0x0c blt .Lmemcpy_srcul2l4 .Lmemcpy_srcul2loop4: mov r12, lr, lsr #16 ldr lr, [r1], #4 orr r12, r12, lr, lsl #16 str r12, [r0], #4 subs r2, r2, #4 bge .Lmemcpy_srcul2loop4 .Lmemcpy_srcul2l4: sub r1, r1, #2 b .Lmemcpy_l4 .Lmemcpy_srcul3: cmp r2, #0x0c blt .Lmemcpy_srcul3loop4 sub r2, r2, #0x0c stmdb sp!, {r4, r5} .Lmemcpy_srcul3loop16: mov r3, lr, lsr #24 ldmia r1!, {r4, r5, r12, lr} orr r3, r3, r4, lsl #8 mov r4, r4, lsr #24 orr r4, r4, r5, lsl #8 mov r5, r5, lsr #24 orr r5, r5, r12, lsl #8 mov r12, r12, lsr #24 orr r12, r12, lr, lsl #8 stmia r0!, {r3-r5, r12} subs r2, r2, #0x10 bge .Lmemcpy_srcul3loop16 ldmia sp!, {r4, r5} adds r2, r2, #0x0c blt .Lmemcpy_srcul3l4 .Lmemcpy_srcul3loop4: mov r12, lr, lsr #24 ldr lr, [r1], #4 orr r12, r12, lr, lsl #8 str r12, [r0], #4 subs r2, r2, #4 bge .Lmemcpy_srcul3loop4 .Lmemcpy_srcul3l4: sub r1, r1, #1 b .Lmemcpy_l4 END(memcpy) #else /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ ENTRY(memcpy) pld [r1] cmp r2, #0x0c ble .Lmemcpy_short /* <= 12 bytes */ #ifdef FLASHADDR #if FLASHADDR > PHYSADDR ldr r3, =FLASHADDR cmp r3, pc bls .Lnormal #else ldr r3, =FLASHADDR cmp r3, pc bhi .Lnormal #endif #endif ldr r3, .L_arm_memcpy ldr r3, [r3] cmp r3, #0 beq .Lnormal ldr r3, .L_min_memcpy_size ldr r3, [r3] cmp r2, r3 blt .Lnormal stmfd sp!, {r0-r2, r4, lr} mov r3, #0 ldr r4, .L_arm_memcpy mov lr, pc ldr pc, [r4] cmp r0, #0 ldmfd sp!, {r0-r2, r4, lr} RETeq .Lnormal: mov r3, r0 /* We must not clobber r0 */ /* Word-align the destination buffer */ ands ip, r3, #0x03 /* Already word aligned? */ beq .Lmemcpy_wordaligned /* Yup */ cmp ip, #0x02 ldrb ip, [r1], #0x01 sub r2, r2, #0x01 strb ip, [r3], #0x01 ldrble ip, [r1], #0x01 suble r2, r2, #0x01 strble ip, [r3], #0x01 ldrblt ip, [r1], #0x01 sublt r2, r2, #0x01 strblt ip, [r3], #0x01 /* Destination buffer is now word aligned */ .Lmemcpy_wordaligned: ands ip, r1, #0x03 /* Is src also word-aligned? */ bne .Lmemcpy_bad_align /* Nope. Things just got bad */ /* Quad-align the destination buffer */ tst r3, #0x07 /* Already quad aligned? */ ldrne ip, [r1], #0x04 stmfd sp!, {r4-r9} /* Free up some registers */ subne r2, r2, #0x04 strne ip, [r3], #0x04 /* Destination buffer quad aligned, source is at least word aligned */ subs r2, r2, #0x80 blt .Lmemcpy_w_lessthan128 /* Copy 128 bytes at a time */ .Lmemcpy_w_loop128: ldr r4, [r1], #0x04 /* LD:00-03 */ ldr r5, [r1], #0x04 /* LD:04-07 */ pld [r1, #0x18] /* Prefetch 0x20 */ ldr r6, [r1], #0x04 /* LD:08-0b */ ldr r7, [r1], #0x04 /* LD:0c-0f */ ldr r8, [r1], #0x04 /* LD:10-13 */ ldr r9, [r1], #0x04 /* LD:14-17 */ strd r4, [r3], #0x08 /* ST:00-07 */ ldr r4, [r1], #0x04 /* LD:18-1b */ ldr r5, [r1], #0x04 /* LD:1c-1f */ strd r6, [r3], #0x08 /* ST:08-0f */ ldr r6, [r1], #0x04 /* LD:20-23 */ ldr r7, [r1], #0x04 /* LD:24-27 */ pld [r1, #0x18] /* Prefetch 0x40 */ strd r8, [r3], #0x08 /* ST:10-17 */ ldr r8, [r1], #0x04 /* LD:28-2b */ ldr r9, [r1], #0x04 /* LD:2c-2f */ strd r4, [r3], #0x08 /* ST:18-1f */ ldr r4, [r1], #0x04 /* LD:30-33 */ ldr r5, [r1], #0x04 /* LD:34-37 */ strd r6, [r3], #0x08 /* ST:20-27 */ ldr r6, [r1], #0x04 /* LD:38-3b */ ldr r7, [r1], #0x04 /* LD:3c-3f */ strd r8, [r3], #0x08 /* ST:28-2f */ ldr r8, [r1], #0x04 /* LD:40-43 */ ldr r9, [r1], #0x04 /* LD:44-47 */ pld [r1, #0x18] /* Prefetch 0x60 */ strd r4, [r3], #0x08 /* ST:30-37 */ ldr r4, [r1], #0x04 /* LD:48-4b */ ldr r5, [r1], #0x04 /* LD:4c-4f */ strd r6, [r3], #0x08 /* ST:38-3f */ ldr r6, [r1], #0x04 /* LD:50-53 */ ldr r7, [r1], #0x04 /* LD:54-57 */ strd r8, [r3], #0x08 /* ST:40-47 */ ldr r8, [r1], #0x04 /* LD:58-5b */ ldr r9, [r1], #0x04 /* LD:5c-5f */ strd r4, [r3], #0x08 /* ST:48-4f */ ldr r4, [r1], #0x04 /* LD:60-63 */ ldr r5, [r1], #0x04 /* LD:64-67 */ pld [r1, #0x18] /* Prefetch 0x80 */ strd r6, [r3], #0x08 /* ST:50-57 */ ldr r6, [r1], #0x04 /* LD:68-6b */ ldr r7, [r1], #0x04 /* LD:6c-6f */ strd r8, [r3], #0x08 /* ST:58-5f */ ldr r8, [r1], #0x04 /* LD:70-73 */ ldr r9, [r1], #0x04 /* LD:74-77 */ strd r4, [r3], #0x08 /* ST:60-67 */ ldr r4, [r1], #0x04 /* LD:78-7b */ ldr r5, [r1], #0x04 /* LD:7c-7f */ strd r6, [r3], #0x08 /* ST:68-6f */ strd r8, [r3], #0x08 /* ST:70-77 */ subs r2, r2, #0x80 strd r4, [r3], #0x08 /* ST:78-7f */ bge .Lmemcpy_w_loop128 .Lmemcpy_w_lessthan128: adds r2, r2, #0x80 /* Adjust for extra sub */ ldmfdeq sp!, {r4-r9} RETeq /* Return now if done */ subs r2, r2, #0x20 blt .Lmemcpy_w_lessthan32 /* Copy 32 bytes at a time */ .Lmemcpy_w_loop32: ldr r4, [r1], #0x04 ldr r5, [r1], #0x04 pld [r1, #0x18] ldr r6, [r1], #0x04 ldr r7, [r1], #0x04 ldr r8, [r1], #0x04 ldr r9, [r1], #0x04 strd r4, [r3], #0x08 ldr r4, [r1], #0x04 ldr r5, [r1], #0x04 strd r6, [r3], #0x08 strd r8, [r3], #0x08 subs r2, r2, #0x20 strd r4, [r3], #0x08 bge .Lmemcpy_w_loop32 .Lmemcpy_w_lessthan32: adds r2, r2, #0x20 /* Adjust for extra sub */ ldmfdeq sp!, {r4-r9} RETeq /* Return now if done */ and r4, r2, #0x18 rsbs r4, r4, #0x18 addne pc, pc, r4, lsl #1 nop /* At least 24 bytes remaining */ ldr r4, [r1], #0x04 ldr r5, [r1], #0x04 sub r2, r2, #0x08 strd r4, [r3], #0x08 /* At least 16 bytes remaining */ ldr r4, [r1], #0x04 ldr r5, [r1], #0x04 sub r2, r2, #0x08 strd r4, [r3], #0x08 /* At least 8 bytes remaining */ ldr r4, [r1], #0x04 ldr r5, [r1], #0x04 subs r2, r2, #0x08 strd r4, [r3], #0x08 /* Less than 8 bytes remaining */ ldmfd sp!, {r4-r9} RETeq /* Return now if done */ subs r2, r2, #0x04 ldrge ip, [r1], #0x04 strge ip, [r3], #0x04 RETeq /* Return now if done */ addlt r2, r2, #0x04 ldrb ip, [r1], #0x01 cmp r2, #0x02 ldrbge r2, [r1], #0x01 strb ip, [r3], #0x01 ldrbgt ip, [r1] strbge r2, [r3], #0x01 strbgt ip, [r3] RET /* Place a literal pool here for the above ldr instructions to use */ .ltorg /* * At this point, it has not been possible to word align both buffers. * The destination buffer is word aligned, but the source buffer is not. */ .Lmemcpy_bad_align: stmfd sp!, {r4-r7} bic r1, r1, #0x03 cmp ip, #2 ldr ip, [r1], #0x04 bgt .Lmemcpy_bad3 beq .Lmemcpy_bad2 b .Lmemcpy_bad1 .Lmemcpy_bad1_loop16: -#ifdef __ARMEB__ - mov r4, ip, lsl #8 -#else mov r4, ip, lsr #8 -#endif ldr r5, [r1], #0x04 pld [r1, #0x018] ldr r6, [r1], #0x04 ldr r7, [r1], #0x04 ldr ip, [r1], #0x04 -#ifdef __ARMEB__ - orr r4, r4, r5, lsr #24 - mov r5, r5, lsl #8 - orr r5, r5, r6, lsr #24 - mov r6, r6, lsl #8 - orr r6, r6, r7, lsr #24 - mov r7, r7, lsl #8 - orr r7, r7, ip, lsr #24 -#else orr r4, r4, r5, lsl #24 mov r5, r5, lsr #8 orr r5, r5, r6, lsl #24 mov r6, r6, lsr #8 orr r6, r6, r7, lsl #24 mov r7, r7, lsr #8 orr r7, r7, ip, lsl #24 -#endif str r4, [r3], #0x04 str r5, [r3], #0x04 str r6, [r3], #0x04 str r7, [r3], #0x04 .Lmemcpy_bad1: subs r2, r2, #0x10 bge .Lmemcpy_bad1_loop16 adds r2, r2, #0x10 ldmfdeq sp!, {r4-r7} RETeq /* Return now if done */ subs r2, r2, #0x04 sublt r1, r1, #0x03 blt .Lmemcpy_bad_done .Lmemcpy_bad1_loop4: -#ifdef __ARMEB__ - mov r4, ip, lsl #8 -#else mov r4, ip, lsr #8 -#endif ldr ip, [r1], #0x04 subs r2, r2, #0x04 -#ifdef __ARMEB__ - orr r4, r4, ip, lsr #24 -#else orr r4, r4, ip, lsl #24 -#endif str r4, [r3], #0x04 bge .Lmemcpy_bad1_loop4 sub r1, r1, #0x03 b .Lmemcpy_bad_done .Lmemcpy_bad2_loop16: -#ifdef __ARMEB__ - mov r4, ip, lsl #16 -#else mov r4, ip, lsr #16 -#endif ldr r5, [r1], #0x04 pld [r1, #0x018] ldr r6, [r1], #0x04 ldr r7, [r1], #0x04 ldr ip, [r1], #0x04 -#ifdef __ARMEB__ - orr r4, r4, r5, lsr #16 - mov r5, r5, lsl #16 - orr r5, r5, r6, lsr #16 - mov r6, r6, lsl #16 - orr r6, r6, r7, lsr #16 - mov r7, r7, lsl #16 - orr r7, r7, ip, lsr #16 -#else orr r4, r4, r5, lsl #16 mov r5, r5, lsr #16 orr r5, r5, r6, lsl #16 mov r6, r6, lsr #16 orr r6, r6, r7, lsl #16 mov r7, r7, lsr #16 orr r7, r7, ip, lsl #16 -#endif str r4, [r3], #0x04 str r5, [r3], #0x04 str r6, [r3], #0x04 str r7, [r3], #0x04 .Lmemcpy_bad2: subs r2, r2, #0x10 bge .Lmemcpy_bad2_loop16 adds r2, r2, #0x10 ldmfdeq sp!, {r4-r7} RETeq /* Return now if done */ subs r2, r2, #0x04 sublt r1, r1, #0x02 blt .Lmemcpy_bad_done .Lmemcpy_bad2_loop4: -#ifdef __ARMEB__ - mov r4, ip, lsl #16 -#else mov r4, ip, lsr #16 -#endif ldr ip, [r1], #0x04 subs r2, r2, #0x04 -#ifdef __ARMEB__ - orr r4, r4, ip, lsr #16 -#else orr r4, r4, ip, lsl #16 -#endif str r4, [r3], #0x04 bge .Lmemcpy_bad2_loop4 sub r1, r1, #0x02 b .Lmemcpy_bad_done .Lmemcpy_bad3_loop16: -#ifdef __ARMEB__ - mov r4, ip, lsl #24 -#else mov r4, ip, lsr #24 -#endif ldr r5, [r1], #0x04 pld [r1, #0x018] ldr r6, [r1], #0x04 ldr r7, [r1], #0x04 ldr ip, [r1], #0x04 -#ifdef __ARMEB__ - orr r4, r4, r5, lsr #8 - mov r5, r5, lsl #24 - orr r5, r5, r6, lsr #8 - mov r6, r6, lsl #24 - orr r6, r6, r7, lsr #8 - mov r7, r7, lsl #24 - orr r7, r7, ip, lsr #8 -#else orr r4, r4, r5, lsl #8 mov r5, r5, lsr #24 orr r5, r5, r6, lsl #8 mov r6, r6, lsr #24 orr r6, r6, r7, lsl #8 mov r7, r7, lsr #24 orr r7, r7, ip, lsl #8 -#endif str r4, [r3], #0x04 str r5, [r3], #0x04 str r6, [r3], #0x04 str r7, [r3], #0x04 .Lmemcpy_bad3: subs r2, r2, #0x10 bge .Lmemcpy_bad3_loop16 adds r2, r2, #0x10 ldmfdeq sp!, {r4-r7} RETeq /* Return now if done */ subs r2, r2, #0x04 sublt r1, r1, #0x01 blt .Lmemcpy_bad_done .Lmemcpy_bad3_loop4: -#ifdef __ARMEB__ - mov r4, ip, lsl #24 -#else mov r4, ip, lsr #24 -#endif ldr ip, [r1], #0x04 subs r2, r2, #0x04 -#ifdef __ARMEB__ - orr r4, r4, ip, lsr #8 -#else orr r4, r4, ip, lsl #8 -#endif str r4, [r3], #0x04 bge .Lmemcpy_bad3_loop4 sub r1, r1, #0x01 .Lmemcpy_bad_done: ldmfd sp!, {r4-r7} adds r2, r2, #0x04 RETeq ldrb ip, [r1], #0x01 cmp r2, #0x02 ldrbge r2, [r1], #0x01 strb ip, [r3], #0x01 ldrbgt ip, [r1] strbge r2, [r3], #0x01 strbgt ip, [r3] RET /* * Handle short copies (less than 16 bytes), possibly misaligned. * Some of these are *very* common, thanks to the network stack, * and so are handled specially. */ .Lmemcpy_short: add pc, pc, r2, lsl #2 nop RET /* 0x00 */ b .Lmemcpy_bytewise /* 0x01 */ b .Lmemcpy_bytewise /* 0x02 */ b .Lmemcpy_bytewise /* 0x03 */ b .Lmemcpy_4 /* 0x04 */ b .Lmemcpy_bytewise /* 0x05 */ b .Lmemcpy_6 /* 0x06 */ b .Lmemcpy_bytewise /* 0x07 */ b .Lmemcpy_8 /* 0x08 */ b .Lmemcpy_bytewise /* 0x09 */ b .Lmemcpy_bytewise /* 0x0a */ b .Lmemcpy_bytewise /* 0x0b */ b .Lmemcpy_c /* 0x0c */ .Lmemcpy_bytewise: mov r3, r0 /* We must not clobber r0 */ ldrb ip, [r1], #0x01 1: subs r2, r2, #0x01 strb ip, [r3], #0x01 ldrbne ip, [r1], #0x01 bne 1b RET /****************************************************************************** * Special case for 4 byte copies */ #define LMEMCPY_4_LOG2 6 /* 64 bytes */ #define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2 LMEMCPY_4_PAD .Lmemcpy_4: and r2, r1, #0x03 orr r2, r2, r0, lsl #2 ands r2, r2, #0x0f sub r3, pc, #0x14 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2 /* * 0000: dst is 32-bit aligned, src is 32-bit aligned */ ldr r2, [r1] str r2, [r0] RET LMEMCPY_4_PAD /* * 0001: dst is 32-bit aligned, src is 8-bit aligned */ ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */ -#ifdef __ARMEB__ - mov r3, r3, lsl #8 /* r3 = 012. */ - orr r3, r3, r2, lsr #24 /* r3 = 0123 */ -#else mov r3, r3, lsr #8 /* r3 = .210 */ orr r3, r3, r2, lsl #24 /* r3 = 3210 */ -#endif str r3, [r0] RET LMEMCPY_4_PAD /* * 0010: dst is 32-bit aligned, src is 16-bit aligned */ -#ifdef __ARMEB__ - ldrh r3, [r1] - ldrh r2, [r1, #0x02] -#else ldrh r3, [r1, #0x02] ldrh r2, [r1] -#endif orr r3, r2, r3, lsl #16 str r3, [r0] RET LMEMCPY_4_PAD /* * 0011: dst is 32-bit aligned, src is 8-bit aligned */ ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */ ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */ -#ifdef __ARMEB__ - mov r3, r3, lsl #24 /* r3 = 0... */ - orr r3, r3, r2, lsr #8 /* r3 = 0123 */ -#else mov r3, r3, lsr #24 /* r3 = ...0 */ orr r3, r3, r2, lsl #8 /* r3 = 3210 */ -#endif str r3, [r0] RET LMEMCPY_4_PAD /* * 0100: dst is 8-bit aligned, src is 32-bit aligned */ ldr r2, [r1] -#ifdef __ARMEB__ - strb r2, [r0, #0x03] - mov r3, r2, lsr #8 - mov r1, r2, lsr #24 - strb r1, [r0] -#else strb r2, [r0] mov r3, r2, lsr #8 mov r1, r2, lsr #24 strb r1, [r0, #0x03] -#endif strh r3, [r0, #0x01] RET LMEMCPY_4_PAD /* * 0101: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r2, [r1] ldrh r3, [r1, #0x01] ldrb r1, [r1, #0x03] strb r2, [r0] strh r3, [r0, #0x01] strb r1, [r0, #0x03] RET LMEMCPY_4_PAD /* * 0110: dst is 8-bit aligned, src is 16-bit aligned */ ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */ -#ifdef __ARMEB__ mov r1, r2, lsr #8 /* r1 = ...0 */ strb r1, [r0] mov r2, r2, lsl #8 /* r2 = .01. */ orr r2, r2, r3, lsr #8 /* r2 = .012 */ -#else - strb r2, [r0] - mov r2, r2, lsr #8 /* r2 = ...1 */ - orr r2, r2, r3, lsl #8 /* r2 = .321 */ - mov r3, r3, lsr #8 /* r3 = ...3 */ -#endif strh r2, [r0, #0x01] strb r3, [r0, #0x03] RET LMEMCPY_4_PAD /* * 0111: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r2, [r1] ldrh r3, [r1, #0x01] ldrb r1, [r1, #0x03] strb r2, [r0] strh r3, [r0, #0x01] strb r1, [r0, #0x03] RET LMEMCPY_4_PAD /* * 1000: dst is 16-bit aligned, src is 32-bit aligned */ ldr r2, [r1] -#ifdef __ARMEB__ - strh r2, [r0, #0x02] - mov r3, r2, lsr #16 - strh r3, [r0] -#else strh r2, [r0] mov r3, r2, lsr #16 strh r3, [r0, #0x02] -#endif RET LMEMCPY_4_PAD /* * 1001: dst is 16-bit aligned, src is 8-bit aligned */ ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */ mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ strh r1, [r0] -#ifdef __ARMEB__ - mov r2, r2, lsl #8 /* r2 = 012. */ - orr r2, r2, r3, lsr #24 /* r2 = 0123 */ -#else mov r2, r2, lsr #24 /* r2 = ...2 */ orr r2, r2, r3, lsl #8 /* r2 = xx32 */ -#endif strh r2, [r0, #0x02] RET LMEMCPY_4_PAD /* * 1010: dst is 16-bit aligned, src is 16-bit aligned */ ldrh r2, [r1] ldrh r3, [r1, #0x02] strh r2, [r0] strh r3, [r0, #0x02] RET LMEMCPY_4_PAD /* * 1011: dst is 16-bit aligned, src is 8-bit aligned */ ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */ ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */ strh r1, [r0, #0x02] -#ifdef __ARMEB__ - mov r3, r3, lsr #24 /* r3 = ...1 */ - orr r3, r3, r2, lsl #8 /* r3 = xx01 */ -#else mov r3, r3, lsl #8 /* r3 = 321. */ orr r3, r3, r2, lsr #24 /* r3 = 3210 */ -#endif strh r3, [r0] RET LMEMCPY_4_PAD /* * 1100: dst is 8-bit aligned, src is 32-bit aligned */ ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ -#ifdef __ARMEB__ - strb r2, [r0, #0x03] - mov r3, r2, lsr #8 - mov r1, r2, lsr #24 - strh r3, [r0, #0x01] - strb r1, [r0] -#else strb r2, [r0] mov r3, r2, lsr #8 mov r1, r2, lsr #24 strh r3, [r0, #0x01] strb r1, [r0, #0x03] -#endif RET LMEMCPY_4_PAD /* * 1101: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r2, [r1] ldrh r3, [r1, #0x01] ldrb r1, [r1, #0x03] strb r2, [r0] strh r3, [r0, #0x01] strb r1, [r0, #0x03] RET LMEMCPY_4_PAD /* * 1110: dst is 8-bit aligned, src is 16-bit aligned */ -#ifdef __ARMEB__ - ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ - strb r3, [r0, #0x03] - mov r3, r3, lsr #8 /* r3 = ...2 */ - orr r3, r3, r2, lsl #8 /* r3 = ..12 */ - strh r3, [r0, #0x01] - mov r2, r2, lsr #8 /* r2 = ...0 */ - strb r2, [r0] -#else - ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ strb r2, [r0] mov r2, r2, lsr #8 /* r2 = ...1 */ orr r2, r2, r3, lsl #8 /* r2 = .321 */ strh r2, [r0, #0x01] mov r3, r3, lsr #8 /* r3 = ...3 */ strb r3, [r0, #0x03] -#endif RET LMEMCPY_4_PAD /* * 1111: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r2, [r1] ldrh r3, [r1, #0x01] ldrb r1, [r1, #0x03] strb r2, [r0] strh r3, [r0, #0x01] strb r1, [r0, #0x03] RET LMEMCPY_4_PAD /****************************************************************************** * Special case for 6 byte copies */ #define LMEMCPY_6_LOG2 6 /* 64 bytes */ #define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2 LMEMCPY_6_PAD .Lmemcpy_6: and r2, r1, #0x03 orr r2, r2, r0, lsl #2 ands r2, r2, #0x0f sub r3, pc, #0x14 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2 /* * 0000: dst is 32-bit aligned, src is 32-bit aligned */ ldr r2, [r1] ldrh r3, [r1, #0x04] str r2, [r0] strh r3, [r0, #0x04] RET LMEMCPY_6_PAD /* * 0001: dst is 32-bit aligned, src is 8-bit aligned */ ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */ -#ifdef __ARMEB__ - mov r2, r2, lsl #8 /* r2 = 012. */ - orr r2, r2, r3, lsr #24 /* r2 = 0123 */ -#else mov r2, r2, lsr #8 /* r2 = .210 */ orr r2, r2, r3, lsl #24 /* r2 = 3210 */ -#endif mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */ str r2, [r0] strh r3, [r0, #0x04] RET LMEMCPY_6_PAD /* * 0010: dst is 32-bit aligned, src is 16-bit aligned */ ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ -#ifdef __ARMEB__ - mov r1, r3, lsr #16 /* r1 = ..23 */ - orr r1, r1, r2, lsl #16 /* r1 = 0123 */ - str r1, [r0] - strh r3, [r0, #0x04] -#else mov r1, r3, lsr #16 /* r1 = ..54 */ orr r2, r2, r3, lsl #16 /* r2 = 3210 */ str r2, [r0] strh r1, [r0, #0x04] -#endif RET LMEMCPY_6_PAD /* * 0011: dst is 32-bit aligned, src is 8-bit aligned */ ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */ ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */ -#ifdef __ARMEB__ - mov r2, r2, lsl #24 /* r2 = 0... */ - orr r2, r2, r3, lsr #8 /* r2 = 0123 */ - mov r3, r3, lsl #8 /* r3 = 234. */ - orr r1, r3, r1, lsr #24 /* r1 = 2345 */ -#else mov r2, r2, lsr #24 /* r2 = ...0 */ orr r2, r2, r3, lsl #8 /* r2 = 3210 */ mov r1, r1, lsl #8 /* r1 = xx5. */ orr r1, r1, r3, lsr #24 /* r1 = xx54 */ -#endif str r2, [r0] strh r1, [r0, #0x04] RET LMEMCPY_6_PAD /* * 0100: dst is 8-bit aligned, src is 32-bit aligned */ ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */ mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ strh r1, [r0, #0x01] -#ifdef __ARMEB__ - mov r1, r3, lsr #24 /* r1 = ...0 */ - strb r1, [r0] - mov r3, r3, lsl #8 /* r3 = 123. */ - orr r3, r3, r2, lsr #8 /* r3 = 1234 */ -#else strb r3, [r0] mov r3, r3, lsr #24 /* r3 = ...3 */ orr r3, r3, r2, lsl #8 /* r3 = .543 */ mov r2, r2, lsr #8 /* r2 = ...5 */ -#endif strh r3, [r0, #0x03] strb r2, [r0, #0x05] RET LMEMCPY_6_PAD /* * 0101: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r2, [r1] ldrh r3, [r1, #0x01] ldrh ip, [r1, #0x03] ldrb r1, [r1, #0x05] strb r2, [r0] strh r3, [r0, #0x01] strh ip, [r0, #0x03] strb r1, [r0, #0x05] RET LMEMCPY_6_PAD /* * 0110: dst is 8-bit aligned, src is 16-bit aligned */ ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ -#ifdef __ARMEB__ - mov r3, r2, lsr #8 /* r3 = ...0 */ - strb r3, [r0] - strb r1, [r0, #0x05] - mov r3, r1, lsr #8 /* r3 = .234 */ - strh r3, [r0, #0x03] - mov r3, r2, lsl #8 /* r3 = .01. */ - orr r3, r3, r1, lsr #24 /* r3 = .012 */ - strh r3, [r0, #0x01] -#else strb r2, [r0] mov r3, r1, lsr #24 strb r3, [r0, #0x05] mov r3, r1, lsr #8 /* r3 = .543 */ strh r3, [r0, #0x03] mov r3, r2, lsr #8 /* r3 = ...1 */ orr r3, r3, r1, lsl #8 /* r3 = 4321 */ strh r3, [r0, #0x01] -#endif RET LMEMCPY_6_PAD /* * 0111: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r2, [r1] ldrh r3, [r1, #0x01] ldrh ip, [r1, #0x03] ldrb r1, [r1, #0x05] strb r2, [r0] strh r3, [r0, #0x01] strh ip, [r0, #0x03] strb r1, [r0, #0x05] RET LMEMCPY_6_PAD /* * 1000: dst is 16-bit aligned, src is 32-bit aligned */ -#ifdef __ARMEB__ - ldr r2, [r1] /* r2 = 0123 */ - ldrh r3, [r1, #0x04] /* r3 = ..45 */ - mov r1, r2, lsr #16 /* r1 = ..01 */ - orr r3, r3, r2, lsl#16 /* r3 = 2345 */ - strh r1, [r0] - str r3, [r0, #0x02] -#else ldrh r2, [r1, #0x04] /* r2 = ..54 */ ldr r3, [r1] /* r3 = 3210 */ mov r2, r2, lsl #16 /* r2 = 54.. */ orr r2, r2, r3, lsr #16 /* r2 = 5432 */ strh r3, [r0] str r2, [r0, #0x02] -#endif RET LMEMCPY_6_PAD /* * 1001: dst is 16-bit aligned, src is 8-bit aligned */ ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */ mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ -#ifdef __ARMEB__ - mov r2, r2, lsr #8 /* r2 = .345 */ - orr r2, r2, r3, lsl #24 /* r2 = 2345 */ -#else mov r2, r2, lsl #8 /* r2 = 543. */ orr r2, r2, r3, lsr #24 /* r2 = 5432 */ -#endif strh r1, [r0] str r2, [r0, #0x02] RET LMEMCPY_6_PAD /* * 1010: dst is 16-bit aligned, src is 16-bit aligned */ ldrh r2, [r1] ldr r3, [r1, #0x02] strh r2, [r0] str r3, [r0, #0x02] RET LMEMCPY_6_PAD /* * 1011: dst is 16-bit aligned, src is 8-bit aligned */ ldrb r3, [r1] /* r3 = ...0 */ ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ ldrb r1, [r1, #0x05] /* r1 = ...5 */ -#ifdef __ARMEB__ - mov r3, r3, lsl #8 /* r3 = ..0. */ - orr r3, r3, r2, lsr #24 /* r3 = ..01 */ - orr r1, r1, r2, lsl #8 /* r1 = 2345 */ -#else orr r3, r3, r2, lsl #8 /* r3 = 3210 */ mov r1, r1, lsl #24 /* r1 = 5... */ orr r1, r1, r2, lsr #8 /* r1 = 5432 */ -#endif strh r3, [r0] str r1, [r0, #0x02] RET LMEMCPY_6_PAD /* * 1100: dst is 8-bit aligned, src is 32-bit aligned */ ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */ -#ifdef __ARMEB__ - mov r3, r2, lsr #24 /* r3 = ...0 */ - strb r3, [r0] - mov r2, r2, lsl #8 /* r2 = 123. */ - orr r2, r2, r1, lsr #8 /* r2 = 1234 */ -#else strb r2, [r0] mov r2, r2, lsr #8 /* r2 = .321 */ orr r2, r2, r1, lsl #24 /* r2 = 4321 */ mov r1, r1, lsr #8 /* r1 = ...5 */ -#endif str r2, [r0, #0x01] strb r1, [r0, #0x05] RET LMEMCPY_6_PAD /* * 1101: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r2, [r1] ldrh r3, [r1, #0x01] ldrh ip, [r1, #0x03] ldrb r1, [r1, #0x05] strb r2, [r0] strh r3, [r0, #0x01] strh ip, [r0, #0x03] strb r1, [r0, #0x05] RET LMEMCPY_6_PAD /* * 1110: dst is 8-bit aligned, src is 16-bit aligned */ ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ -#ifdef __ARMEB__ - mov r3, r2, lsr #8 /* r3 = ...0 */ - strb r3, [r0] - mov r2, r2, lsl #24 /* r2 = 1... */ - orr r2, r2, r1, lsr #8 /* r2 = 1234 */ -#else strb r2, [r0] mov r2, r2, lsr #8 /* r2 = ...1 */ orr r2, r2, r1, lsl #8 /* r2 = 4321 */ mov r1, r1, lsr #24 /* r1 = ...5 */ -#endif str r2, [r0, #0x01] strb r1, [r0, #0x05] RET LMEMCPY_6_PAD /* * 1111: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r2, [r1] ldr r3, [r1, #0x01] ldrb r1, [r1, #0x05] strb r2, [r0] str r3, [r0, #0x01] strb r1, [r0, #0x05] RET LMEMCPY_6_PAD /****************************************************************************** * Special case for 8 byte copies */ #define LMEMCPY_8_LOG2 6 /* 64 bytes */ #define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2 LMEMCPY_8_PAD .Lmemcpy_8: and r2, r1, #0x03 orr r2, r2, r0, lsl #2 ands r2, r2, #0x0f sub r3, pc, #0x14 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2 /* * 0000: dst is 32-bit aligned, src is 32-bit aligned */ ldr r2, [r1] ldr r3, [r1, #0x04] str r2, [r0] str r3, [r0, #0x04] RET LMEMCPY_8_PAD /* * 0001: dst is 32-bit aligned, src is 8-bit aligned */ ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */ ldrb r1, [r1, #0x07] /* r1 = ...7 */ -#ifdef __ARMEB__ - mov r3, r3, lsl #8 /* r3 = 012. */ - orr r3, r3, r2, lsr #24 /* r3 = 0123 */ - orr r2, r1, r2, lsl #8 /* r2 = 4567 */ -#else mov r3, r3, lsr #8 /* r3 = .210 */ orr r3, r3, r2, lsl #24 /* r3 = 3210 */ mov r1, r1, lsl #24 /* r1 = 7... */ orr r2, r1, r2, lsr #8 /* r2 = 7654 */ -#endif str r3, [r0] str r2, [r0, #0x04] RET LMEMCPY_8_PAD /* * 0010: dst is 32-bit aligned, src is 16-bit aligned */ ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ -#ifdef __ARMEB__ - mov r2, r2, lsl #16 /* r2 = 01.. */ - orr r2, r2, r3, lsr #16 /* r2 = 0123 */ - orr r3, r1, r3, lsl #16 /* r3 = 4567 */ -#else orr r2, r2, r3, lsl #16 /* r2 = 3210 */ mov r3, r3, lsr #16 /* r3 = ..54 */ orr r3, r3, r1, lsl #16 /* r3 = 7654 */ -#endif str r2, [r0] str r3, [r0, #0x04] RET LMEMCPY_8_PAD /* * 0011: dst is 32-bit aligned, src is 8-bit aligned */ ldrb r3, [r1] /* r3 = ...0 */ ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */ -#ifdef __ARMEB__ - mov r3, r3, lsl #24 /* r3 = 0... */ - orr r3, r3, r2, lsr #8 /* r3 = 0123 */ - mov r2, r2, lsl #24 /* r2 = 4... */ - orr r2, r2, r1, lsr #8 /* r2 = 4567 */ -#else orr r3, r3, r2, lsl #8 /* r3 = 3210 */ mov r2, r2, lsr #24 /* r2 = ...4 */ orr r2, r2, r1, lsl #8 /* r2 = 7654 */ -#endif str r3, [r0] str r2, [r0, #0x04] RET LMEMCPY_8_PAD /* * 0100: dst is 8-bit aligned, src is 32-bit aligned */ ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */ -#ifdef __ARMEB__ - mov r1, r3, lsr #24 /* r1 = ...0 */ - strb r1, [r0] - mov r1, r3, lsr #8 /* r1 = .012 */ - strb r2, [r0, #0x07] - mov r3, r3, lsl #24 /* r3 = 3... */ - orr r3, r3, r2, lsr #8 /* r3 = 3456 */ -#else strb r3, [r0] mov r1, r2, lsr #24 /* r1 = ...7 */ strb r1, [r0, #0x07] mov r1, r3, lsr #8 /* r1 = .321 */ mov r3, r3, lsr #24 /* r3 = ...3 */ orr r3, r3, r2, lsl #8 /* r3 = 6543 */ -#endif strh r1, [r0, #0x01] str r3, [r0, #0x03] RET LMEMCPY_8_PAD /* * 0101: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r2, [r1] ldrh r3, [r1, #0x01] ldr ip, [r1, #0x03] ldrb r1, [r1, #0x07] strb r2, [r0] strh r3, [r0, #0x01] str ip, [r0, #0x03] strb r1, [r0, #0x07] RET LMEMCPY_8_PAD /* * 0110: dst is 8-bit aligned, src is 16-bit aligned */ ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ -#ifdef __ARMEB__ - mov ip, r2, lsr #8 /* ip = ...0 */ - strb ip, [r0] - mov ip, r2, lsl #8 /* ip = .01. */ - orr ip, ip, r3, lsr #24 /* ip = .012 */ - strb r1, [r0, #0x07] - mov r3, r3, lsl #8 /* r3 = 345. */ - orr r3, r3, r1, lsr #8 /* r3 = 3456 */ -#else strb r2, [r0] /* 0 */ mov ip, r1, lsr #8 /* ip = ...7 */ strb ip, [r0, #0x07] /* 7 */ mov ip, r2, lsr #8 /* ip = ...1 */ orr ip, ip, r3, lsl #8 /* ip = 4321 */ mov r3, r3, lsr #8 /* r3 = .543 */ orr r3, r3, r1, lsl #24 /* r3 = 6543 */ -#endif strh ip, [r0, #0x01] str r3, [r0, #0x03] RET LMEMCPY_8_PAD /* * 0111: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r3, [r1] /* r3 = ...0 */ ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */ ldrb r1, [r1, #0x07] /* r1 = ...7 */ strb r3, [r0] mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */ -#ifdef __ARMEB__ - strh r3, [r0, #0x01] - orr r2, r2, ip, lsl #16 /* r2 = 3456 */ -#else strh ip, [r0, #0x01] orr r2, r3, r2, lsl #16 /* r2 = 6543 */ -#endif str r2, [r0, #0x03] strb r1, [r0, #0x07] RET LMEMCPY_8_PAD /* * 1000: dst is 16-bit aligned, src is 32-bit aligned */ ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ -#ifdef __ARMEB__ - strh r1, [r0] - mov r1, r3, lsr #16 /* r1 = ..45 */ - orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */ -#else strh r2, [r0] orr r2, r1, r3, lsl #16 /* r2 = 5432 */ mov r3, r3, lsr #16 /* r3 = ..76 */ -#endif str r2, [r0, #0x02] strh r3, [r0, #0x06] RET LMEMCPY_8_PAD /* * 1001: dst is 16-bit aligned, src is 8-bit aligned */ ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ ldrb ip, [r1, #0x07] /* ip = ...7 */ mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ strh r1, [r0] -#ifdef __ARMEB__ - mov r1, r2, lsl #24 /* r1 = 2... */ - orr r1, r1, r3, lsr #8 /* r1 = 2345 */ - orr r3, ip, r3, lsl #8 /* r3 = 4567 */ -#else mov r1, r2, lsr #24 /* r1 = ...2 */ orr r1, r1, r3, lsl #8 /* r1 = 5432 */ mov r3, r3, lsr #24 /* r3 = ...6 */ orr r3, r3, ip, lsl #8 /* r3 = ..76 */ -#endif str r1, [r0, #0x02] strh r3, [r0, #0x06] RET LMEMCPY_8_PAD /* * 1010: dst is 16-bit aligned, src is 16-bit aligned */ ldrh r2, [r1] ldr ip, [r1, #0x02] ldrh r3, [r1, #0x06] strh r2, [r0] str ip, [r0, #0x02] strh r3, [r0, #0x06] RET LMEMCPY_8_PAD /* * 1011: dst is 16-bit aligned, src is 8-bit aligned */ ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */ ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ ldrb ip, [r1] /* ip = ...0 */ mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */ strh r1, [r0, #0x06] -#ifdef __ARMEB__ - mov r3, r3, lsr #24 /* r3 = ...5 */ - orr r3, r3, r2, lsl #8 /* r3 = 2345 */ - mov r2, r2, lsr #24 /* r2 = ...1 */ - orr r2, r2, ip, lsl #8 /* r2 = ..01 */ -#else mov r3, r3, lsl #24 /* r3 = 5... */ orr r3, r3, r2, lsr #8 /* r3 = 5432 */ orr r2, ip, r2, lsl #8 /* r2 = 3210 */ -#endif str r3, [r0, #0x02] strh r2, [r0] RET LMEMCPY_8_PAD /* * 1100: dst is 8-bit aligned, src is 32-bit aligned */ ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */ strh r1, [r0, #0x05] -#ifdef __ARMEB__ - strb r3, [r0, #0x07] - mov r1, r2, lsr #24 /* r1 = ...0 */ - strb r1, [r0] - mov r2, r2, lsl #8 /* r2 = 123. */ - orr r2, r2, r3, lsr #24 /* r2 = 1234 */ - str r2, [r0, #0x01] -#else strb r2, [r0] mov r1, r3, lsr #24 /* r1 = ...7 */ strb r1, [r0, #0x07] mov r2, r2, lsr #8 /* r2 = .321 */ orr r2, r2, r3, lsl #24 /* r2 = 4321 */ str r2, [r0, #0x01] -#endif RET LMEMCPY_8_PAD /* * 1101: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r3, [r1] /* r3 = ...0 */ ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */ ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ ldrb r1, [r1, #0x07] /* r1 = ...7 */ strb r3, [r0] mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */ -#ifdef __ARMEB__ - strh ip, [r0, #0x05] - orr r2, r3, r2, lsl #16 /* r2 = 1234 */ -#else strh r3, [r0, #0x05] orr r2, r2, ip, lsl #16 /* r2 = 4321 */ -#endif str r2, [r0, #0x01] strb r1, [r0, #0x07] RET LMEMCPY_8_PAD /* * 1110: dst is 8-bit aligned, src is 16-bit aligned */ ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ -#ifdef __ARMEB__ - mov ip, r2, lsr #8 /* ip = ...0 */ - strb ip, [r0] - mov ip, r2, lsl #24 /* ip = 1... */ - orr ip, ip, r3, lsr #8 /* ip = 1234 */ - strb r1, [r0, #0x07] - mov r1, r1, lsr #8 /* r1 = ...6 */ - orr r1, r1, r3, lsl #8 /* r1 = 3456 */ -#else strb r2, [r0] mov ip, r2, lsr #8 /* ip = ...1 */ orr ip, ip, r3, lsl #8 /* ip = 4321 */ mov r2, r1, lsr #8 /* r2 = ...7 */ strb r2, [r0, #0x07] mov r1, r1, lsl #8 /* r1 = .76. */ orr r1, r1, r3, lsr #24 /* r1 = .765 */ -#endif str ip, [r0, #0x01] strh r1, [r0, #0x05] RET LMEMCPY_8_PAD /* * 1111: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r2, [r1] ldr ip, [r1, #0x01] ldrh r3, [r1, #0x05] ldrb r1, [r1, #0x07] strb r2, [r0] str ip, [r0, #0x01] strh r3, [r0, #0x05] strb r1, [r0, #0x07] RET LMEMCPY_8_PAD /****************************************************************************** * Special case for 12 byte copies */ #define LMEMCPY_C_LOG2 7 /* 128 bytes */ #define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2 LMEMCPY_C_PAD .Lmemcpy_c: and r2, r1, #0x03 orr r2, r2, r0, lsl #2 ands r2, r2, #0x0f sub r3, pc, #0x14 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2 /* * 0000: dst is 32-bit aligned, src is 32-bit aligned */ ldr r2, [r1] ldr r3, [r1, #0x04] ldr r1, [r1, #0x08] str r2, [r0] str r3, [r0, #0x04] str r1, [r0, #0x08] RET LMEMCPY_C_PAD /* * 0001: dst is 32-bit aligned, src is 8-bit aligned */ ldrb r2, [r1, #0xb] /* r2 = ...B */ ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ -#ifdef __ARMEB__ - orr r2, r2, ip, lsl #8 /* r2 = 89AB */ - str r2, [r0, #0x08] - mov r2, ip, lsr #24 /* r2 = ...7 */ - orr r2, r2, r3, lsl #8 /* r2 = 4567 */ - mov r1, r1, lsl #8 /* r1 = 012. */ - orr r1, r1, r3, lsr #24 /* r1 = 0123 */ -#else mov r2, r2, lsl #24 /* r2 = B... */ orr r2, r2, ip, lsr #8 /* r2 = BA98 */ str r2, [r0, #0x08] mov r2, ip, lsl #24 /* r2 = 7... */ orr r2, r2, r3, lsr #8 /* r2 = 7654 */ mov r1, r1, lsr #8 /* r1 = .210 */ orr r1, r1, r3, lsl #24 /* r1 = 3210 */ -#endif str r2, [r0, #0x04] str r1, [r0] RET LMEMCPY_C_PAD /* * 0010: dst is 32-bit aligned, src is 16-bit aligned */ ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ -#ifdef __ARMEB__ - mov r2, r2, lsl #16 /* r2 = 01.. */ - orr r2, r2, r3, lsr #16 /* r2 = 0123 */ - str r2, [r0] - mov r3, r3, lsl #16 /* r3 = 45.. */ - orr r3, r3, ip, lsr #16 /* r3 = 4567 */ - orr r1, r1, ip, lsl #16 /* r1 = 89AB */ -#else orr r2, r2, r3, lsl #16 /* r2 = 3210 */ str r2, [r0] mov r3, r3, lsr #16 /* r3 = ..54 */ orr r3, r3, ip, lsl #16 /* r3 = 7654 */ mov r1, r1, lsl #16 /* r1 = BA.. */ orr r1, r1, ip, lsr #16 /* r1 = BA98 */ -#endif str r3, [r0, #0x04] str r1, [r0, #0x08] RET LMEMCPY_C_PAD /* * 0011: dst is 32-bit aligned, src is 8-bit aligned */ ldrb r2, [r1] /* r2 = ...0 */ ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ -#ifdef __ARMEB__ - mov r2, r2, lsl #24 /* r2 = 0... */ - orr r2, r2, r3, lsr #8 /* r2 = 0123 */ - str r2, [r0] - mov r3, r3, lsl #24 /* r3 = 4... */ - orr r3, r3, ip, lsr #8 /* r3 = 4567 */ - mov r1, r1, lsr #8 /* r1 = .9AB */ - orr r1, r1, ip, lsl #24 /* r1 = 89AB */ -#else orr r2, r2, r3, lsl #8 /* r2 = 3210 */ str r2, [r0] mov r3, r3, lsr #24 /* r3 = ...4 */ orr r3, r3, ip, lsl #8 /* r3 = 7654 */ mov r1, r1, lsl #8 /* r1 = BA9. */ orr r1, r1, ip, lsr #24 /* r1 = BA98 */ -#endif str r3, [r0, #0x04] str r1, [r0, #0x08] RET LMEMCPY_C_PAD /* * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned */ ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */ mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ strh r1, [r0, #0x01] -#ifdef __ARMEB__ - mov r1, r2, lsr #24 /* r1 = ...0 */ - strb r1, [r0] - mov r1, r2, lsl #24 /* r1 = 3... */ - orr r2, r1, r3, lsr #8 /* r1 = 3456 */ - mov r1, r3, lsl #24 /* r1 = 7... */ - orr r1, r1, ip, lsr #8 /* r1 = 789A */ -#else strb r2, [r0] mov r1, r2, lsr #24 /* r1 = ...3 */ orr r2, r1, r3, lsl #8 /* r1 = 6543 */ mov r1, r3, lsr #24 /* r1 = ...7 */ orr r1, r1, ip, lsl #8 /* r1 = A987 */ mov ip, ip, lsr #24 /* ip = ...B */ -#endif str r2, [r0, #0x03] str r1, [r0, #0x07] strb ip, [r0, #0x0b] RET LMEMCPY_C_PAD /* * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1) */ ldrb r2, [r1] ldrh r3, [r1, #0x01] ldr ip, [r1, #0x03] strb r2, [r0] ldr r2, [r1, #0x07] ldrb r1, [r1, #0x0b] strh r3, [r0, #0x01] str ip, [r0, #0x03] str r2, [r0, #0x07] strb r1, [r0, #0x0b] RET LMEMCPY_C_PAD /* * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned */ ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ -#ifdef __ARMEB__ - mov r2, r2, ror #8 /* r2 = 1..0 */ strb r2, [r0] - mov r2, r2, lsr #16 /* r2 = ..1. */ - orr r2, r2, r3, lsr #24 /* r2 = ..12 */ - strh r2, [r0, #0x01] - mov r2, r3, lsl #8 /* r2 = 345. */ - orr r3, r2, ip, lsr #24 /* r3 = 3456 */ - mov r2, ip, lsl #8 /* r2 = 789. */ - orr r2, r2, r1, lsr #8 /* r2 = 789A */ -#else - strb r2, [r0] mov r2, r2, lsr #8 /* r2 = ...1 */ orr r2, r2, r3, lsl #8 /* r2 = 4321 */ strh r2, [r0, #0x01] mov r2, r3, lsr #8 /* r2 = .543 */ orr r3, r2, ip, lsl #24 /* r3 = 6543 */ mov r2, ip, lsr #8 /* r2 = .987 */ orr r2, r2, r1, lsl #24 /* r2 = A987 */ mov r1, r1, lsr #8 /* r1 = ...B */ -#endif str r3, [r0, #0x03] str r2, [r0, #0x07] strb r1, [r0, #0x0b] RET LMEMCPY_C_PAD /* * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3) */ ldrb r2, [r1] ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ strb r2, [r0] -#ifdef __ARMEB__ - mov r2, r3, lsr #16 /* r2 = ..12 */ - strh r2, [r0, #0x01] - mov r3, r3, lsl #16 /* r3 = 34.. */ - orr r3, r3, ip, lsr #16 /* r3 = 3456 */ - mov ip, ip, lsl #16 /* ip = 78.. */ - orr ip, ip, r1, lsr #16 /* ip = 789A */ - mov r1, r1, lsr #8 /* r1 = .9AB */ -#else strh r3, [r0, #0x01] mov r3, r3, lsr #16 /* r3 = ..43 */ orr r3, r3, ip, lsl #16 /* r3 = 6543 */ mov ip, ip, lsr #16 /* ip = ..87 */ orr ip, ip, r1, lsl #16 /* ip = A987 */ mov r1, r1, lsr #16 /* r1 = ..xB */ -#endif str r3, [r0, #0x03] str ip, [r0, #0x07] strb r1, [r0, #0x0b] RET LMEMCPY_C_PAD /* * 1000: dst is 16-bit aligned, src is 32-bit aligned */ ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */ ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */ mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ -#ifdef __ARMEB__ - strh r1, [r0] - mov r1, ip, lsl #16 /* r1 = 23.. */ - orr r1, r1, r3, lsr #16 /* r1 = 2345 */ - mov r3, r3, lsl #16 /* r3 = 67.. */ - orr r3, r3, r2, lsr #16 /* r3 = 6789 */ -#else strh ip, [r0] orr r1, r1, r3, lsl #16 /* r1 = 5432 */ mov r3, r3, lsr #16 /* r3 = ..76 */ orr r3, r3, r2, lsl #16 /* r3 = 9876 */ mov r2, r2, lsr #16 /* r2 = ..BA */ -#endif str r1, [r0, #0x02] str r3, [r0, #0x06] strh r2, [r0, #0x0a] RET LMEMCPY_C_PAD /* * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1) */ ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */ strh ip, [r0] ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ ldrb r1, [r1, #0x0b] /* r1 = ...B */ -#ifdef __ARMEB__ - mov r2, r2, lsl #24 /* r2 = 2... */ - orr r2, r2, r3, lsr #8 /* r2 = 2345 */ - mov r3, r3, lsl #24 /* r3 = 6... */ - orr r3, r3, ip, lsr #8 /* r3 = 6789 */ - orr r1, r1, ip, lsl #8 /* r1 = 89AB */ -#else mov r2, r2, lsr #24 /* r2 = ...2 */ orr r2, r2, r3, lsl #8 /* r2 = 5432 */ mov r3, r3, lsr #24 /* r3 = ...6 */ orr r3, r3, ip, lsl #8 /* r3 = 9876 */ mov r1, r1, lsl #8 /* r1 = ..B. */ orr r1, r1, ip, lsr #24 /* r1 = ..BA */ -#endif str r2, [r0, #0x02] str r3, [r0, #0x06] strh r1, [r0, #0x0a] RET LMEMCPY_C_PAD /* * 1010: dst is 16-bit aligned, src is 16-bit aligned */ ldrh r2, [r1] ldr r3, [r1, #0x02] ldr ip, [r1, #0x06] ldrh r1, [r1, #0x0a] strh r2, [r0] str r3, [r0, #0x02] str ip, [r0, #0x06] strh r1, [r0, #0x0a] RET LMEMCPY_C_PAD /* * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3) */ ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */ ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */ mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */ strh ip, [r0, #0x0a] ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ ldrb r1, [r1] /* r1 = ...0 */ -#ifdef __ARMEB__ - mov r2, r2, lsr #24 /* r2 = ...9 */ - orr r2, r2, r3, lsl #8 /* r2 = 6789 */ - mov r3, r3, lsr #24 /* r3 = ...5 */ - orr r3, r3, ip, lsl #8 /* r3 = 2345 */ - mov r1, r1, lsl #8 /* r1 = ..0. */ - orr r1, r1, ip, lsr #24 /* r1 = ..01 */ -#else mov r2, r2, lsl #24 /* r2 = 9... */ orr r2, r2, r3, lsr #8 /* r2 = 9876 */ mov r3, r3, lsl #24 /* r3 = 5... */ orr r3, r3, ip, lsr #8 /* r3 = 5432 */ orr r1, r1, ip, lsl #8 /* r1 = 3210 */ -#endif str r2, [r0, #0x06] str r3, [r0, #0x02] strh r1, [r0] RET LMEMCPY_C_PAD /* * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned */ ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */ ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */ -#ifdef __ARMEB__ - mov r3, r2, lsr #24 /* r3 = ...0 */ - strb r3, [r0] - mov r2, r2, lsl #8 /* r2 = 123. */ - orr r2, r2, ip, lsr #24 /* r2 = 1234 */ - str r2, [r0, #0x01] - mov r2, ip, lsl #8 /* r2 = 567. */ - orr r2, r2, r1, lsr #24 /* r2 = 5678 */ - str r2, [r0, #0x05] - mov r2, r1, lsr #8 /* r2 = ..9A */ - strh r2, [r0, #0x09] - strb r1, [r0, #0x0b] -#else strb r2, [r0] mov r3, r2, lsr #8 /* r3 = .321 */ orr r3, r3, ip, lsl #24 /* r3 = 4321 */ str r3, [r0, #0x01] mov r3, ip, lsr #8 /* r3 = .765 */ orr r3, r3, r1, lsl #24 /* r3 = 8765 */ str r3, [r0, #0x05] mov r1, r1, lsr #8 /* r1 = .BA9 */ strh r1, [r0, #0x09] mov r1, r1, lsr #16 /* r1 = ...B */ strb r1, [r0, #0x0b] -#endif RET LMEMCPY_C_PAD /* * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1) */ ldrb r2, [r1, #0x0b] /* r2 = ...B */ ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */ ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ strb r2, [r0, #0x0b] -#ifdef __ARMEB__ - strh r3, [r0, #0x09] - mov r3, r3, lsr #16 /* r3 = ..78 */ - orr r3, r3, ip, lsl #16 /* r3 = 5678 */ - mov ip, ip, lsr #16 /* ip = ..34 */ - orr ip, ip, r1, lsl #16 /* ip = 1234 */ - mov r1, r1, lsr #16 /* r1 = ..x0 */ -#else mov r2, r3, lsr #16 /* r2 = ..A9 */ strh r2, [r0, #0x09] mov r3, r3, lsl #16 /* r3 = 87.. */ orr r3, r3, ip, lsr #16 /* r3 = 8765 */ mov ip, ip, lsl #16 /* ip = 43.. */ orr ip, ip, r1, lsr #16 /* ip = 4321 */ mov r1, r1, lsr #8 /* r1 = .210 */ -#endif str r3, [r0, #0x05] str ip, [r0, #0x01] strb r1, [r0] RET LMEMCPY_C_PAD /* * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned */ -#ifdef __ARMEB__ - ldrh r2, [r1, #0x0a] /* r2 = ..AB */ - ldr ip, [r1, #0x06] /* ip = 6789 */ - ldr r3, [r1, #0x02] /* r3 = 2345 */ - ldrh r1, [r1] /* r1 = ..01 */ - strb r2, [r0, #0x0b] - mov r2, r2, lsr #8 /* r2 = ...A */ - orr r2, r2, ip, lsl #8 /* r2 = 789A */ - mov ip, ip, lsr #8 /* ip = .678 */ - orr ip, ip, r3, lsl #24 /* ip = 5678 */ - mov r3, r3, lsr #8 /* r3 = .234 */ - orr r3, r3, r1, lsl #24 /* r3 = 1234 */ - mov r1, r1, lsr #8 /* r1 = ...0 */ - strb r1, [r0] - str r3, [r0, #0x01] - str ip, [r0, #0x05] - strh r2, [r0, #0x09] -#else ldrh r2, [r1] /* r2 = ..10 */ ldr r3, [r1, #0x02] /* r3 = 5432 */ ldr ip, [r1, #0x06] /* ip = 9876 */ ldrh r1, [r1, #0x0a] /* r1 = ..BA */ strb r2, [r0] mov r2, r2, lsr #8 /* r2 = ...1 */ orr r2, r2, r3, lsl #8 /* r2 = 4321 */ mov r3, r3, lsr #24 /* r3 = ...5 */ orr r3, r3, ip, lsl #8 /* r3 = 8765 */ mov ip, ip, lsr #24 /* ip = ...9 */ orr ip, ip, r1, lsl #8 /* ip = .BA9 */ mov r1, r1, lsr #8 /* r1 = ...B */ str r2, [r0, #0x01] str r3, [r0, #0x05] strh ip, [r0, #0x09] strb r1, [r0, #0x0b] -#endif RET LMEMCPY_C_PAD /* * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3) */ ldrb r2, [r1] ldr r3, [r1, #0x01] ldr ip, [r1, #0x05] strb r2, [r0] ldrh r2, [r1, #0x09] ldrb r1, [r1, #0x0b] str r3, [r0, #0x01] str ip, [r0, #0x05] strh r2, [r0, #0x09] strb r1, [r0, #0x0b] RET END(memcpy) #endif /* _ARM_ARCH_5E */ #ifdef GPROF ENTRY(user) nop END(user) ENTRY(btrap) nop END(btrap) ENTRY(etrap) nop END(etrap) ENTRY(bintr) nop END(bintr) ENTRY(eintr) nop END(eintr) #endif Index: head/sys/arm/arm/vm_machdep.c =================================================================== --- head/sys/arm/arm/vm_machdep.c (revision 368152) +++ head/sys/arm/arm/vm_machdep.c (revision 368153) @@ -1,346 +1,320 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1982, 1986 The Regents of the University of California. * Copyright (c) 1989, 1990 William Jolitz * Copyright (c) 1994 John Dyson * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and William Jolitz. * * Redistribution and use in source and binary :forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * struct switchframe and trapframe must both be a multiple of 8 * for correct stack alignment. */ _Static_assert((sizeof(struct switchframe) % 8) == 0, "Bad alignment"); _Static_assert((sizeof(struct trapframe) % 8) == 0, "Bad alignment"); uint32_t initial_fpscr = VFPSCR_DN | VFPSCR_FZ; /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) { struct pcb *pcb2; struct trapframe *tf; struct mdproc *mdp2; if ((flags & RFPROC) == 0) return; /* Point the pcb to the top of the stack */ pcb2 = (struct pcb *) (td2->td_kstack + td2->td_kstack_pages * PAGE_SIZE) - 1; #ifdef VFP /* Store actual state of VFP */ if (curthread == td1) { critical_enter(); vfp_store(&td1->td_pcb->pcb_vfpstate, false); critical_exit(); } #endif td2->td_pcb = pcb2; /* Clone td1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); /* Point to mdproc and then copy over td1's contents */ mdp2 = &p2->p_md; bcopy(&td1->td_proc->p_md, mdp2, sizeof(*mdp2)); /* Point the frame to the stack in front of pcb and copy td1's frame */ td2->td_frame = (struct trapframe *)pcb2 - 1; *td2->td_frame = *td1->td_frame; /* * Create a new fresh stack for the new process. * Copy the trap frame for the return to user mode as if from a * syscall. This copies most of the user mode register values. */ pmap_set_pcb_pagedir(vmspace_pmap(p2->p_vmspace), pcb2); pcb2->pcb_regs.sf_r4 = (register_t)fork_return; pcb2->pcb_regs.sf_r5 = (register_t)td2; pcb2->pcb_regs.sf_lr = (register_t)fork_trampoline; pcb2->pcb_regs.sf_sp = STACKALIGN(td2->td_frame); pcb2->pcb_regs.sf_tpidrurw = (register_t)get_tls(); pcb2->pcb_vfpcpu = -1; pcb2->pcb_vfpstate.fpscr = initial_fpscr; tf = td2->td_frame; tf->tf_spsr &= ~PSR_C; tf->tf_r0 = 0; tf->tf_r1 = 0; /* Setup to release spin count in fork_exit(). */ td2->td_md.md_spinlock_count = 1; td2->td_md.md_saved_cspr = PSR_SVC32_MODE; } void cpu_thread_swapin(struct thread *td) { } void cpu_thread_swapout(struct thread *td) { } void cpu_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame; - int fixup; -#ifdef __ARMEB__ - u_int call; -#endif frame = td->td_frame; - fixup = 0; - -#ifdef __ARMEB__ - /* - * __syscall returns an off_t while most other syscalls return an - * int. As an off_t is 64-bits and an int is 32-bits we need to - * place the returned data into r1. As the lseek and freebsd6_lseek - * syscalls also return an off_t they do not need this fixup. - */ - call = frame->tf_r7; - if (call == SYS___syscall) { - register_t *ap = &frame->tf_r0; - register_t code = ap[_QUAD_LOWWORD]; - fixup = (code != SYS_lseek); - } -#endif - switch (error) { case 0: - if (fixup) { - frame->tf_r0 = 0; - frame->tf_r1 = td->td_retval[0]; - } else { - frame->tf_r0 = td->td_retval[0]; - frame->tf_r1 = td->td_retval[1]; - } + frame->tf_r0 = td->td_retval[0]; + frame->tf_r1 = td->td_retval[1]; frame->tf_spsr &= ~PSR_C; /* carry bit */ break; case ERESTART: /* * Reconstruct the pc to point at the swi. */ #if __ARM_ARCH >= 7 if ((frame->tf_spsr & PSR_T) != 0) frame->tf_pc -= THUMB_INSN_SIZE; else #endif frame->tf_pc -= INSN_SIZE; break; case EJUSTRETURN: /* nothing to do */ break; default: frame->tf_r0 = error; frame->tf_spsr |= PSR_C; /* carry bit */ break; } } /* * Initialize machine state, mostly pcb and trap frame for a new * thread, about to return to userspace. Put enough state in the new * thread's PCB to get it to go back to the fork_return(), which * finalizes the thread state and handles peculiarities of the first * return to userspace for the new thread. */ void cpu_copy_thread(struct thread *td, struct thread *td0) { bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); bcopy(td0->td_pcb, td->td_pcb, sizeof(struct pcb)); td->td_pcb->pcb_regs.sf_r4 = (register_t)fork_return; td->td_pcb->pcb_regs.sf_r5 = (register_t)td; td->td_pcb->pcb_regs.sf_lr = (register_t)fork_trampoline; td->td_pcb->pcb_regs.sf_sp = STACKALIGN(td->td_frame); td->td_frame->tf_spsr &= ~PSR_C; td->td_frame->tf_r0 = 0; /* Setup to release spin count in fork_exit(). */ td->td_md.md_spinlock_count = 1; td->td_md.md_saved_cspr = PSR_SVC32_MODE; } /* * Set that machine state for performing an upcall that starts * the entry function with the given argument. */ void cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg, stack_t *stack) { struct trapframe *tf = td->td_frame; tf->tf_usr_sp = STACKALIGN((int)stack->ss_sp + stack->ss_size); tf->tf_pc = (int)entry; tf->tf_r0 = (int)arg; tf->tf_spsr = PSR_USR32_MODE; } int cpu_set_user_tls(struct thread *td, void *tls_base) { td->td_pcb->pcb_regs.sf_tpidrurw = (register_t)tls_base; if (td == curthread) set_tls(tls_base); return (0); } void cpu_thread_exit(struct thread *td) { } void cpu_thread_alloc(struct thread *td) { td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE) - 1; /* * Ensure td_frame is aligned to an 8 byte boundary as it will be * placed into the stack pointer which must be 8 byte aligned in * the ARM EABI. */ td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb) - 1; } void cpu_thread_free(struct thread *td) { } void cpu_thread_clean(struct thread *td) { } /* * Intercept the return address from a freshly forked process that has NOT * been scheduled yet. * * This is needed to make kernel threads stay in kernel mode. */ void cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg) { td->td_pcb->pcb_regs.sf_r4 = (register_t)func; /* function */ td->td_pcb->pcb_regs.sf_r5 = (register_t)arg; /* first arg */ } /* * Software interrupt handler for queued VM system processing. */ void swi_vm(void *dummy) { if (busdma_swi_pending) busdma_swi(); } void cpu_exit(struct thread *td) { } bool cpu_exec_vmspace_reuse(struct proc *p __unused, vm_map_t map __unused) { return (true); } int cpu_procctl(struct thread *td __unused, int idtype __unused, id_t id __unused, int com __unused, void *data __unused) { return (EINVAL); }