Index: stable/10/sys/arm/arm/cpufunc_asm_arm10.S =================================================================== --- stable/10/sys/arm/arm/cpufunc_asm_arm10.S (revision 269795) +++ stable/10/sys/arm/arm/cpufunc_asm_arm10.S (revision 269796) @@ -1,276 +1,276 @@ /* $NetBSD: cpufunc_asm_arm10.S,v 1.1 2003/09/06 09:12:29 rearnsha Exp $ */ /*- * Copyright (c) 2002 ARM Limited * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the company may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * ARM10 assembly functions for CPU / MMU / TLB specific operations * */ #include __FBSDID("$FreeBSD$"); /* * Functions to set the MMU Translation Table Base register * * We need to clean and flush the cache as it uses virtual * addresses that are about to change. */ ENTRY(arm10_setttb) stmfd sp!, {r0, lr} bl _C_LABEL(arm10_idcache_wbinv_all) ldmfd sp!, {r0, lr} mcr p15, 0, r0, c2, c0, 0 /* load new TTB */ mcr p15, 0, r0, c8, c7, 0 /* invalidate I+D TLBs */ bx lr END(arm10_setttb) /* * TLB functions */ ENTRY(arm10_tlb_flushID_SE) mcr p15, 0, r0, c8, c6, 1 /* flush D tlb single entry */ mcr p15, 0, r0, c8, c5, 1 /* flush I tlb single entry */ bx lr END(arm10_tlb_flushID_SE) ENTRY(arm10_tlb_flushI_SE) mcr p15, 0, r0, c8, c5, 1 /* flush I tlb single entry */ bx lr END(arm10_tlb_flushI_SE) /* * Cache operations. For the entire cache we use the set/index * operations. */ s_max .req r0 i_max .req r1 s_inc .req r2 i_inc .req r3 ENTRY_NP(arm10_icache_sync_range) ldr ip, .Larm10_line_size cmp r1, #0x4000 bcs .Larm10_icache_sync_all ldr ip, [ip] sub r3, ip, #1 and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 .Larm10_sync_next: mcr p15, 0, r0, c7, c5, 1 /* Invalidate I cache SE with VA */ mcr p15, 0, r0, c7, c10, 1 /* Clean D cache SE with VA */ add r0, r0, ip subs r1, r1, ip bhi .Larm10_sync_next mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ bx lr END(arm10_icache_sync_range) ENTRY_NP(arm10_icache_sync_all) .Larm10_icache_sync_all: /* * We assume that the code here can never be out of sync with the * dcache, so that we can safely flush the Icache and fall through * into the Dcache cleaning code. */ mcr p15, 0, r0, c7, c5, 0 /* Flush I cache */ /* Fall through to clean Dcache. */ .Larm10_dcache_wb: ldr ip, .Larm10_cache_data ldmia ip, {s_max, i_max, s_inc, i_inc} .Lnext_set: orr ip, s_max, i_max .Lnext_index: mcr p15, 0, ip, c7, c10, 2 /* Clean D cache SE with Set/Index */ subs ip, ip, i_inc bhs .Lnext_index /* Next index */ subs s_max, s_max, s_inc bhs .Lnext_set /* Next set */ mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ bx lr END(arm10_icache_sync_all) .Larm10_line_size: .word _C_LABEL(arm_pdcache_line_size) ENTRY(arm10_dcache_wb_range) ldr ip, .Larm10_line_size cmp r1, #0x4000 bcs .Larm10_dcache_wb ldr ip, [ip] sub r3, ip, #1 and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 .Larm10_wb_next: mcr p15, 0, r0, c7, c10, 1 /* Clean D cache SE with VA */ add r0, r0, ip subs r1, r1, ip bhi .Larm10_wb_next mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ bx lr END(arm10_dcache_wb_range) ENTRY(arm10_dcache_wbinv_range) ldr ip, .Larm10_line_size cmp r1, #0x4000 bcs .Larm10_dcache_wbinv_all ldr ip, [ip] sub r3, ip, #1 and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 .Larm10_wbinv_next: mcr p15, 0, r0, c7, c14, 1 /* Purge D cache SE with VA */ add r0, r0, ip subs r1, r1, ip bhi .Larm10_wbinv_next mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ bx lr END(arm10_dcache_wbinv_range) /* * Note, we must not invalidate everything. If the range is too big we * must use wb-inv of the entire cache. */ ENTRY(arm10_dcache_inv_range) ldr ip, .Larm10_line_size cmp r1, #0x4000 bcs .Larm10_dcache_wbinv_all ldr ip, [ip] sub r3, ip, #1 and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 .Larm10_inv_next: mcr p15, 0, r0, c7, c6, 1 /* Invalidate D cache SE with VA */ add r0, r0, ip subs r1, r1, ip bhi .Larm10_inv_next mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ bx lr END(arm10_dcache_inv_range) ENTRY(arm10_idcache_wbinv_range) ldr ip, .Larm10_line_size cmp r1, #0x4000 bcs .Larm10_idcache_wbinv_all ldr ip, [ip] sub r3, ip, #1 and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 .Larm10_id_wbinv_next: mcr p15, 0, r0, c7, c5, 1 /* Invalidate I cache SE with VA */ mcr p15, 0, r0, c7, c14, 1 /* Purge D cache SE with VA */ add r0, r0, ip subs r1, r1, ip bhi .Larm10_id_wbinv_next mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ bx lr END(arm10_idcache_wbinv_range) ENTRY_NP(arm10_idcache_wbinv_all) .Larm10_idcache_wbinv_all: /* * We assume that the code here can never be out of sync with the * dcache, so that we can safely flush the Icache and fall through * into the Dcache purging code. */ mcr p15, 0, r0, c7, c5, 0 /* Flush I cache */ /* Fall through to purge Dcache. */ -ENTRY(arm10_dcache_wbinv_all) +EENTRY(arm10_dcache_wbinv_all) .Larm10_dcache_wbinv_all: ldr ip, .Larm10_cache_data ldmia ip, {s_max, i_max, s_inc, i_inc} .Lnext_set_inv: orr ip, s_max, i_max .Lnext_index_inv: mcr p15, 0, ip, c7, c14, 2 /* Purge D cache SE with Set/Index */ subs ip, ip, i_inc bhs .Lnext_index_inv /* Next index */ subs s_max, s_max, s_inc bhs .Lnext_set_inv /* Next set */ mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ bx lr +EEND(arm10_dcache_wbinv_all) END(arm10_idcache_wbinv_all) -END(arm10_dcache_wbinv_all) .Larm10_cache_data: .word _C_LABEL(arm10_dcache_sets_max) /* * Context switch. * * These is the CPU-specific parts of the context switcher cpu_switch() * These functions actually perform the TTB reload. * * NOTE: Special calling convention * r1, r4-r13 must be preserved */ ENTRY(arm10_context_switch) /* * We can assume that the caches will only contain kernel addresses * at this point. So no need to flush them again. */ mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ mcr p15, 0, r0, c2, c0, 0 /* set the new TTB */ mcr p15, 0, r0, c8, c7, 0 /* and flush the I+D tlbs */ /* Paranoia -- make sure the pipeline is empty. */ nop nop nop bx lr END(arm10_context_switch) .bss /* XXX The following macros should probably be moved to asm.h */ #define _DATA_OBJECT(x) .globl x; .type x,_ASM_TYPE_OBJECT; x: #define C_OBJECT(x) _DATA_OBJECT(_C_LABEL(x)) /* * Parameters for the cache cleaning code. Note that the order of these * four variables is assumed in the code above. Hence the reason for * declaring them in the assembler file. */ .align 0 C_OBJECT(arm10_dcache_sets_max) .space 4 C_OBJECT(arm10_dcache_index_max) .space 4 C_OBJECT(arm10_dcache_sets_inc) .space 4 C_OBJECT(arm10_dcache_index_inc) .space 4 Index: stable/10/sys/arm/arm/cpufunc_asm_arm9.S =================================================================== --- stable/10/sys/arm/arm/cpufunc_asm_arm9.S (revision 269795) +++ stable/10/sys/arm/arm/cpufunc_asm_arm9.S (revision 269796) @@ -1,263 +1,263 @@ /* $NetBSD: cpufunc_asm_arm9.S,v 1.3 2004/01/26 15:54:16 rearnsha Exp $ */ /* * Copyright (c) 2001, 2004 ARM Limited * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the company may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * ARM9 assembly functions for CPU / MMU / TLB specific operations */ #include __FBSDID("$FreeBSD$"); /* * Functions to set the MMU Translation Table Base register * * We need to clean and flush the cache as it uses virtual * addresses that are about to change. */ ENTRY(arm9_setttb) stmfd sp!, {r0, lr} bl _C_LABEL(arm9_idcache_wbinv_all) ldmfd sp!, {r0, lr} mcr p15, 0, r0, c2, c0, 0 /* load new TTB */ mcr p15, 0, r0, c8, c7, 0 /* invalidate I+D TLBs */ mov pc, lr END(arm9_setttb) /* * TLB functions */ ENTRY(arm9_tlb_flushID_SE) mcr p15, 0, r0, c8, c6, 1 /* flush D tlb single entry */ mcr p15, 0, r0, c8, c5, 1 /* flush I tlb single entry */ mov pc, lr END(arm9_tlb_flushID_SE) /* * Cache operations. For the entire cache we use the set/index * operations. */ s_max .req r0 i_max .req r1 s_inc .req r2 i_inc .req r3 ENTRY_NP(arm9_icache_sync_range) ldr ip, .Larm9_line_size cmp r1, #0x4000 bcs .Larm9_icache_sync_all ldr ip, [ip] sub r3, ip, #1 and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 .Larm9_sync_next: mcr p15, 0, r0, c7, c5, 1 /* Invalidate I cache SE with VA */ mcr p15, 0, r0, c7, c10, 1 /* Clean D cache SE with VA */ add r0, r0, ip subs r1, r1, ip bhi .Larm9_sync_next mov pc, lr END(arm9_icache_sync_range) ENTRY_NP(arm9_icache_sync_all) .Larm9_icache_sync_all: /* * We assume that the code here can never be out of sync with the * dcache, so that we can safely flush the Icache and fall through * into the Dcache cleaning code. */ mcr p15, 0, r0, c7, c5, 0 /* Flush I cache */ /* Fall through to clean Dcache. */ .Larm9_dcache_wb: ldr ip, .Larm9_cache_data ldmia ip, {s_max, i_max, s_inc, i_inc} .Lnext_set: orr ip, s_max, i_max .Lnext_index: mcr p15, 0, ip, c7, c10, 2 /* Clean D cache SE with Set/Index */ subs ip, ip, i_inc bhs .Lnext_index /* Next index */ subs s_max, s_max, s_inc bhs .Lnext_set /* Next set */ mov pc, lr END(arm9_icache_sync_all) .Larm9_line_size: .word _C_LABEL(arm_pdcache_line_size) ENTRY(arm9_dcache_wb_range) ldr ip, .Larm9_line_size cmp r1, #0x4000 bcs .Larm9_dcache_wb ldr ip, [ip] sub r3, ip, #1 and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 .Larm9_wb_next: mcr p15, 0, r0, c7, c10, 1 /* Clean D cache SE with VA */ add r0, r0, ip subs r1, r1, ip bhi .Larm9_wb_next mov pc, lr END(arm9_dcache_wb_range) ENTRY(arm9_dcache_wbinv_range) ldr ip, .Larm9_line_size cmp r1, #0x4000 bcs .Larm9_dcache_wbinv_all ldr ip, [ip] sub r3, ip, #1 and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 .Larm9_wbinv_next: mcr p15, 0, r0, c7, c14, 1 /* Purge D cache SE with VA */ add r0, r0, ip subs r1, r1, ip bhi .Larm9_wbinv_next mov pc, lr END(arm9_dcache_wbinv_range) /* * Note, we must not invalidate everything. If the range is too big we * must use wb-inv of the entire cache. */ ENTRY(arm9_dcache_inv_range) ldr ip, .Larm9_line_size cmp r1, #0x4000 bcs .Larm9_dcache_wbinv_all ldr ip, [ip] sub r3, ip, #1 and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 .Larm9_inv_next: mcr p15, 0, r0, c7, c6, 1 /* Invalidate D cache SE with VA */ add r0, r0, ip subs r1, r1, ip bhi .Larm9_inv_next mov pc, lr END(arm9_dcache_inv_range) ENTRY(arm9_idcache_wbinv_range) ldr ip, .Larm9_line_size cmp r1, #0x4000 bcs .Larm9_idcache_wbinv_all ldr ip, [ip] sub r3, ip, #1 and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 .Larm9_id_wbinv_next: mcr p15, 0, r0, c7, c5, 1 /* Invalidate I cache SE with VA */ mcr p15, 0, r0, c7, c14, 1 /* Purge D cache SE with VA */ add r0, r0, ip subs r1, r1, ip bhi .Larm9_id_wbinv_next mov pc, lr END(arm9_idcache_wbinv_range) ENTRY_NP(arm9_idcache_wbinv_all) .Larm9_idcache_wbinv_all: /* * We assume that the code here can never be out of sync with the * dcache, so that we can safely flush the Icache and fall through * into the Dcache purging code. */ mcr p15, 0, r0, c7, c5, 0 /* Flush I cache */ /* Fall through */ -ENTRY(arm9_dcache_wbinv_all) +EENTRY(arm9_dcache_wbinv_all) .Larm9_dcache_wbinv_all: ldr ip, .Larm9_cache_data ldmia ip, {s_max, i_max, s_inc, i_inc} .Lnext_set_inv: orr ip, s_max, i_max .Lnext_index_inv: mcr p15, 0, ip, c7, c14, 2 /* Purge D cache SE with Set/Index */ subs ip, ip, i_inc bhs .Lnext_index_inv /* Next index */ subs s_max, s_max, s_inc bhs .Lnext_set_inv /* Next set */ mov pc, lr +EEND(arm9_dcache_wbinv_all) END(arm9_idcache_wbinv_all) -END(arm9_dcache_wbinv_all) .Larm9_cache_data: .word _C_LABEL(arm9_dcache_sets_max) /* * Context switch. * * These is the CPU-specific parts of the context switcher cpu_switch() * These functions actually perform the TTB reload. * * NOTE: Special calling convention * r1, r4-r13 must be preserved */ ENTRY(arm9_context_switch) /* * We can assume that the caches will only contain kernel addresses * at this point. So no need to flush them again. */ mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ mcr p15, 0, r0, c2, c0, 0 /* set the new TTB */ mcr p15, 0, r0, c8, c7, 0 /* and flush the I+D tlbs */ /* Paranoia -- make sure the pipeline is empty. */ nop nop nop mov pc, lr END(arm9_context_switch) .bss /* XXX The following macros should probably be moved to asm.h */ #define _DATA_OBJECT(x) .globl x; .type x,_ASM_TYPE_OBJECT; x: #define C_OBJECT(x) _DATA_OBJECT(_C_LABEL(x)) /* * Parameters for the cache cleaning code. Note that the order of these * four variables is assumed in the code above. Hence the reason for * declaring them in the assembler file. */ .align 0 C_OBJECT(arm9_dcache_sets_max) .space 4 C_OBJECT(arm9_dcache_index_max) .space 4 C_OBJECT(arm9_dcache_sets_inc) .space 4 C_OBJECT(arm9_dcache_index_inc) .space 4 Index: stable/10/sys/arm/arm/cpufunc_asm_armv5.S =================================================================== --- stable/10/sys/arm/arm/cpufunc_asm_armv5.S (revision 269795) +++ stable/10/sys/arm/arm/cpufunc_asm_armv5.S (revision 269796) @@ -1,247 +1,248 @@ /* $NetBSD: cpufunc_asm_armv5.S,v 1.3 2007/01/06 00:50:54 christos Exp $ */ /* * Copyright (c) 2002, 2005 ARM Limited * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the company may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * ARMv5 assembly functions for manipulating caches. * These routines can be used by any core that supports the set/index * operations. */ #include __FBSDID("$FreeBSD$"); /* * Functions to set the MMU Translation Table Base register * * We need to clean and flush the cache as it uses virtual * addresses that are about to change. */ ENTRY(armv5_setttb) stmfd sp!, {r0, lr} bl _C_LABEL(armv5_idcache_wbinv_all) ldmfd sp!, {r0, lr} mcr p15, 0, r0, c2, c0, 0 /* load new TTB */ mcr p15, 0, r0, c8, c7, 0 /* invalidate I+D TLBs */ RET END(armv5_setttb) /* * Cache operations. For the entire cache we use the set/index * operations. */ s_max .req r0 i_max .req r1 s_inc .req r2 i_inc .req r3 ENTRY_NP(armv5_icache_sync_range) ldr ip, .Larmv5_line_size cmp r1, #0x4000 bcs .Larmv5_icache_sync_all ldr ip, [ip] sub r1, r1, #1 /* Don't overrun */ sub r3, ip, #1 and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 1: mcr p15, 0, r0, c7, c5, 1 /* Invalidate I cache SE with VA */ mcr p15, 0, r0, c7, c10, 1 /* Clean D cache SE with VA */ add r0, r0, ip subs r1, r1, ip bpl 1b mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ RET END(armv5_icache_sync_range) ENTRY_NP(armv5_icache_sync_all) .Larmv5_icache_sync_all: /* * We assume that the code here can never be out of sync with the * dcache, so that we can safely flush the Icache and fall through * into the Dcache cleaning code. */ mcr p15, 0, r0, c7, c5, 0 /* Flush I cache */ /* Fall through to clean Dcache. */ .Larmv5_dcache_wb: ldr ip, .Larmv5_cache_data ldmia ip, {s_max, i_max, s_inc, i_inc} 1: orr ip, s_max, i_max 2: mcr p15, 0, ip, c7, c10, 2 /* Clean D cache SE with Set/Index */ sub ip, ip, i_inc tst ip, i_max /* Index 0 is last one */ bne 2b /* Next index */ mcr p15, 0, ip, c7, c10, 2 /* Clean D cache SE with Set/Index */ subs s_max, s_max, s_inc bpl 1b /* Next set */ mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ RET END(armv5_icache_sync_all) .Larmv5_line_size: .word _C_LABEL(arm_pdcache_line_size) ENTRY(armv5_dcache_wb_range) ldr ip, .Larmv5_line_size cmp r1, #0x4000 bcs .Larmv5_dcache_wb ldr ip, [ip] sub r1, r1, #1 /* Don't overrun */ sub r3, ip, #1 and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 1: mcr p15, 0, r0, c7, c10, 1 /* Clean D cache SE with VA */ add r0, r0, ip subs r1, r1, ip bpl 1b mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ RET END(armv5_dcache_wb_range) ENTRY(armv5_dcache_wbinv_range) ldr ip, .Larmv5_line_size cmp r1, #0x4000 bcs .Larmv5_dcache_wbinv_all ldr ip, [ip] sub r1, r1, #1 /* Don't overrun */ sub r3, ip, #1 and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 1: mcr p15, 0, r0, c7, c14, 1 /* Purge D cache SE with VA */ add r0, r0, ip subs r1, r1, ip bpl 1b mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ RET END(armv5_dcache_wbinv_range) /* * Note, we must not invalidate everything. If the range is too big we * must use wb-inv of the entire cache. */ ENTRY(armv5_dcache_inv_range) ldr ip, .Larmv5_line_size cmp r1, #0x4000 bcs .Larmv5_dcache_wbinv_all ldr ip, [ip] sub r1, r1, #1 /* Don't overrun */ sub r3, ip, #1 and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 1: mcr p15, 0, r0, c7, c6, 1 /* Invalidate D cache SE with VA */ add r0, r0, ip subs r1, r1, ip bpl 1b mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ RET END(armv5_dcache_inv_range) ENTRY(armv5_idcache_wbinv_range) ldr ip, .Larmv5_line_size cmp r1, #0x4000 bcs .Larmv5_idcache_wbinv_all ldr ip, [ip] sub r1, r1, #1 /* Don't overrun */ sub r3, ip, #1 and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 1: mcr p15, 0, r0, c7, c5, 1 /* Invalidate I cache SE with VA */ mcr p15, 0, r0, c7, c14, 1 /* Purge D cache SE with VA */ add r0, r0, ip subs r1, r1, ip bpl 1b mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ RET END(armv5_idcache_wbinv_range) ENTRY_NP(armv5_idcache_wbinv_all) +armv5_idcache_wbinv_all: .Larmv5_idcache_wbinv_all: /* * We assume that the code here can never be out of sync with the * dcache, so that we can safely flush the Icache and fall through * into the Dcache purging code. */ mcr p15, 0, r0, c7, c5, 0 /* Flush I cache */ /* Fall through to purge Dcache. */ -ENTRY(armv5_dcache_wbinv_all) +EENTRY(armv5_dcache_wbinv_all) .Larmv5_dcache_wbinv_all: ldr ip, .Larmv5_cache_data ldmia ip, {s_max, i_max, s_inc, i_inc} 1: orr ip, s_max, i_max 2: mcr p15, 0, ip, c7, c14, 2 /* Purge D cache SE with Set/Index */ sub ip, ip, i_inc tst ip, i_max /* Index 0 is last one */ bne 2b /* Next index */ mcr p15, 0, ip, c7, c14, 2 /* Purge D cache SE with Set/Index */ subs s_max, s_max, s_inc bpl 1b /* Next set */ mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ RET +EEND(armv5_dcache_wbinv_all) END(armv5_idcache_wbinv_all) -END(armv5_dcache_wbinv_all) .Larmv5_cache_data: .word _C_LABEL(armv5_dcache_sets_max) .bss /* XXX The following macros should probably be moved to asm.h */ #define _DATA_OBJECT(x) .globl x; .type x,_ASM_TYPE_OBJECT; x: #define C_OBJECT(x) _DATA_OBJECT(_C_LABEL(x)) /* * Parameters for the cache cleaning code. Note that the order of these * four variables is assumed in the code above. Hence the reason for * declaring them in the assembler file. */ .align 0 C_OBJECT(armv5_dcache_sets_max) .space 4 C_OBJECT(armv5_dcache_index_max) .space 4 C_OBJECT(armv5_dcache_sets_inc) .space 4 C_OBJECT(armv5_dcache_index_inc) .space 4 Index: stable/10/sys/arm/arm/cpufunc_asm_armv6.S =================================================================== --- stable/10/sys/arm/arm/cpufunc_asm_armv6.S (revision 269795) +++ stable/10/sys/arm/arm/cpufunc_asm_armv6.S (revision 269796) @@ -1,152 +1,152 @@ /* $NetBSD: cpufunc_asm_armv6.S,v 1.4 2010/12/10 02:06:22 bsh Exp $ */ /* * Copyright (c) 2002, 2005 ARM Limited * Portions Copyright (c) 2007 Microsoft * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the company may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * ARMv6 assembly functions for manipulating caches. * These routines can be used by any core that supports the mcrr address * range operations. */ /* * $FreeBSD$ */ #include .arch armv6 /* * Functions to set the MMU Translation Table Base register * * We need to clean and flush the cache as it uses virtual * addresses that are about to change. */ ENTRY(armv6_setttb) mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ mcr p15, 0, r0, c2, c0, 0 /* load new TTB */ mcr p15, 0, r0, c8, c7, 0 /* invalidate I+D TLBs */ RET END(armv6_setttb) /* * Cache operations. */ /* LINTSTUB: void armv6_icache_sync_range(vaddr_t, vsize_t); */ ENTRY_NP(armv6_icache_sync_range) add r1, r1, r0 sub r1, r1, #1 mcrr p15, 0, r1, r0, c5 /* invalidate I cache range */ mcrr p15, 0, r1, r0, c12 /* clean D cache range */ mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ RET END(armv6_icache_sync_range) /* LINTSTUB: void armv6_icache_sync_all(void); */ ENTRY_NP(armv6_icache_sync_all) /* * We assume that the code here can never be out of sync with the * dcache, so that we can safely flush the Icache and fall through * into the Dcache cleaning code. */ mcr p15, 0, r0, c7, c5, 0 /* Flush I cache */ mcr p15, 0, r0, c7, c10, 0 /* Clean D cache */ mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ RET END(armv6_icache_sync_all) /* LINTSTUB: void armv6_dcache_wb_range(vaddr_t, vsize_t); */ ENTRY(armv6_dcache_wb_range) add r1, r1, r0 sub r1, r1, #1 mcrr p15, 0, r1, r0, c12 /* clean D cache range */ mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ RET END(armv6_dcache_wb_range) /* LINTSTUB: void armv6_dcache_wbinv_range(vaddr_t, vsize_t); */ ENTRY(armv6_dcache_wbinv_range) add r1, r1, r0 sub r1, r1, #1 mcrr p15, 0, r1, r0, c14 /* clean and invaliate D cache range */ mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ RET END(armv6_dcache_wbinv_range) /* * Note, we must not invalidate everything. If the range is too big we * must use wb-inv of the entire cache. * * LINTSTUB: void armv6_dcache_inv_range(vaddr_t, vsize_t); */ ENTRY(armv6_dcache_inv_range) add r1, r1, r0 sub r1, r1, #1 mcrr p15, 0, r1, r0, c6 /* invaliate D cache range */ mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ RET END(armv6_dcache_inv_range) /* LINTSTUB: void armv6_idcache_wbinv_range(vaddr_t, vsize_t); */ ENTRY(armv6_idcache_wbinv_range) add r1, r1, r0 sub r1, r1, #1 mcrr p15, 0, r1, r0, c5 /* invaliate I cache range */ mcrr p15, 0, r1, r0, c14 /* clean & invaliate D cache range */ mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ RET END(armv6_idcache_wbinv_range) /* LINTSTUB: void armv6_idcache_wbinv_all(void); */ ENTRY_NP(armv6_idcache_wbinv_all) /* * We assume that the code here can never be out of sync with the * dcache, so that we can safely flush the Icache and fall through * into the Dcache purging code. */ mcr p15, 0, r0, c7, c5, 0 /* Flush I cache */ /* Fall through to purge Dcache. */ /* LINTSTUB: void armv6_dcache_wbinv_all(void); */ -ENTRY(armv6_dcache_wbinv_all) +EENTRY(armv6_dcache_wbinv_all) mcr p15, 0, r0, c7, c14, 0 /* clean & invalidate D cache */ mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ RET +EEND(armv6_dcache_wbinv_all) END(armv6_idcache_wbinv_all) -END(armv6_dcache_wbinv_all) ENTRY(armv6_idcache_inv_all) mov r0, #0 mcr p15, 0, r0, c7, c7, 0 /* invalidate all I+D cache */ RET END(armv6_idcache_inv_all) Index: stable/10/sys/arm/arm/cpufunc_asm_armv7.S =================================================================== --- stable/10/sys/arm/arm/cpufunc_asm_armv7.S (revision 269795) +++ stable/10/sys/arm/arm/cpufunc_asm_armv7.S (revision 269796) @@ -1,368 +1,368 @@ /*- * Copyright (c) 2010 Per Odlund * Copyright (C) 2011 MARVELL INTERNATIONAL LTD. * All rights reserved. * * Developed by Semihalf. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of MARVELL nor the names of contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); .cpu cortex-a8 .Lcoherency_level: .word _C_LABEL(arm_cache_loc) .Lcache_type: .word _C_LABEL(arm_cache_type) .Lway_mask: .word 0x3ff .Lmax_index: .word 0x7fff .Lpage_mask: .word 0xfff #define PT_NOS (1 << 5) #define PT_S (1 << 1) #define PT_INNER_NC 0 #define PT_INNER_WT (1 << 0) #define PT_INNER_WB ((1 << 0) | (1 << 6)) #define PT_INNER_WBWA (1 << 6) #define PT_OUTER_NC 0 #define PT_OUTER_WT (2 << 3) #define PT_OUTER_WB (3 << 3) #define PT_OUTER_WBWA (1 << 3) #ifdef SMP #define PT_ATTR (PT_S|PT_INNER_WBWA|PT_OUTER_WBWA|PT_NOS) #else #define PT_ATTR (PT_INNER_WBWA|PT_OUTER_WBWA) #endif ENTRY(armv7_setttb) stmdb sp!, {r0, lr} bl _C_LABEL(armv7_idcache_wbinv_all) /* clean the D cache */ ldmia sp!, {r0, lr} dsb orr r0, r0, #PT_ATTR mcr p15, 0, r0, c2, c0, 0 /* Translation Table Base Register 0 (TTBR0) */ isb #ifdef SMP mcr p15, 0, r0, c8, c3, 0 /* invalidate I+D TLBs Inner Shareable*/ #else mcr p15, 0, r0, c8, c7, 0 /* invalidate I+D TLBs */ #endif dsb isb RET END(armv7_setttb) ENTRY(armv7_tlb_flushID) dsb #ifdef SMP mcr p15, 0, r0, c8, c3, 0 /* flush Unified TLB all entries Inner Shareable */ mcr p15, 0, r0, c7, c1, 6 /* flush BTB Inner Shareable */ #else mcr p15, 0, r0, c8, c7, 0 /* flush Unified TLB all entries */ mcr p15, 0, r0, c7, c5, 6 /* flush BTB */ #endif dsb isb mov pc, lr END(armv7_tlb_flushID) ENTRY(armv7_tlb_flushID_SE) ldr r1, .Lpage_mask bic r0, r0, r1 #ifdef SMP mcr p15, 0, r0, c8, c3, 3 /* flush Unified TLB single entry Inner Shareable */ mcr p15, 0, r0, c7, c1, 6 /* flush BTB Inner Shareable */ #else mcr p15, 0, r0, c8, c7, 1 /* flush Unified TLB single entry */ mcr p15, 0, r0, c7, c5, 6 /* flush BTB */ #endif dsb isb mov pc, lr END(armv7_tlb_flushID_SE) /* Based on algorithm from ARM Architecture Reference Manual */ ENTRY(armv7_dcache_wbinv_all) stmdb sp!, {r4, r5, r6, r7, r8, r9} /* Get cache level */ ldr r0, .Lcoherency_level ldr r3, [r0] cmp r3, #0 beq Finished /* For each cache level */ mov r8, #0 Loop1: /* Get cache type for given level */ mov r2, r8, lsl #2 add r2, r2, r2 ldr r0, .Lcache_type ldr r1, [r0, r2] /* Get line size */ and r2, r1, #7 add r2, r2, #4 /* Get number of ways */ ldr r4, .Lway_mask ands r4, r4, r1, lsr #3 clz r5, r4 /* Get max index */ ldr r7, .Lmax_index ands r7, r7, r1, lsr #13 Loop2: mov r9, r4 Loop3: mov r6, r8, lsl #1 orr r6, r6, r9, lsl r5 orr r6, r6, r7, lsl r2 /* Clean and invalidate data cache by way/index */ mcr p15, 0, r6, c7, c14, 2 subs r9, r9, #1 bge Loop3 subs r7, r7, #1 bge Loop2 Skip: add r8, r8, #1 cmp r3, r8 bne Loop1 Finished: dsb ldmia sp!, {r4, r5, r6, r7, r8, r9} RET END(armv7_dcache_wbinv_all) ENTRY(armv7_idcache_wbinv_all) stmdb sp!, {lr} bl armv7_dcache_wbinv_all #ifdef SMP mcr p15, 0, r0, c7, c1, 0 /* Invalidate all I caches to PoU (ICIALLUIS) */ #else mcr p15, 0, r0, c7, c5, 0 /* Invalidate all I caches to PoU (ICIALLU) */ #endif dsb isb ldmia sp!, {lr} RET END(armv7_idcache_wbinv_all) /* XXX Temporary set it to 32 for MV cores, however this value should be * get from Cache Type register */ .Larmv7_line_size: .word 32 ENTRY(armv7_dcache_wb_range) ldr ip, .Larmv7_line_size sub r3, ip, #1 and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 .Larmv7_wb_next: mcr p15, 0, r0, c7, c10, 1 /* Clean D cache SE with VA */ add r0, r0, ip subs r1, r1, ip bhi .Larmv7_wb_next dsb /* data synchronization barrier */ RET END(armv7_dcache_wb_range) ENTRY(armv7_dcache_wbinv_range) ldr ip, .Larmv7_line_size sub r3, ip, #1 and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 .Larmv7_wbinv_next: mcr p15, 0, r0, c7, c14, 1 /* Purge D cache SE with VA */ add r0, r0, ip subs r1, r1, ip bhi .Larmv7_wbinv_next dsb /* data synchronization barrier */ RET END(armv7_dcache_wbinv_range) /* * Note, we must not invalidate everything. If the range is too big we * must use wb-inv of the entire cache. */ ENTRY(armv7_dcache_inv_range) ldr ip, .Larmv7_line_size sub r3, ip, #1 and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 .Larmv7_inv_next: mcr p15, 0, r0, c7, c6, 1 /* Invalidate D cache SE with VA */ add r0, r0, ip subs r1, r1, ip bhi .Larmv7_inv_next dsb /* data synchronization barrier */ RET END(armv7_dcache_inv_range) ENTRY(armv7_idcache_wbinv_range) ldr ip, .Larmv7_line_size sub r3, ip, #1 and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 .Larmv7_id_wbinv_next: mcr p15, 0, r0, c7, c5, 1 /* Invalidate I cache SE with VA */ mcr p15, 0, r0, c7, c14, 1 /* Purge D cache SE with VA */ add r0, r0, ip subs r1, r1, ip bhi .Larmv7_id_wbinv_next isb /* instruction synchronization barrier */ dsb /* data synchronization barrier */ RET END(armv7_idcache_wbinv_range) ENTRY_NP(armv7_icache_sync_all) #ifdef SMP mcr p15, 0, r0, c7, c1, 0 /* Invalidate all I cache to PoU Inner Shareable */ #else mcr p15, 0, r0, c7, c5, 0 /* Invalidate all I cache to PoU (ICIALLU) */ #endif isb /* instruction synchronization barrier */ dsb /* data synchronization barrier */ RET END(armv7_icache_sync_all) ENTRY_NP(armv7_icache_sync_range) ldr ip, .Larmv7_line_size .Larmv7_sync_next: mcr p15, 0, r0, c7, c5, 1 /* Invalidate I cache SE with VA */ mcr p15, 0, r0, c7, c10, 1 /* Clean D cache SE with VA */ add r0, r0, ip subs r1, r1, ip bhi .Larmv7_sync_next isb /* instruction synchronization barrier */ dsb /* data synchronization barrier */ RET END(armv7_icache_sync_range) ENTRY(armv7_cpu_sleep) dsb /* data synchronization barrier */ wfi /* wait for interrupt */ RET END(armv7_cpu_sleep) ENTRY(armv7_context_switch) dsb orr r0, r0, #PT_ATTR mcr p15, 0, r0, c2, c0, 0 /* set the new TTB */ isb #ifdef SMP mcr p15, 0, r0, c8, c3, 0 /* and flush the I+D tlbs Inner Sharable */ #else mcr p15, 0, r0, c8, c7, 0 /* and flush the I+D tlbs */ #endif dsb isb RET END(armv7_context_switch) ENTRY(armv7_drain_writebuf) dsb RET END(armv7_drain_writebuf) ENTRY(armv7_sev) dsb sev nop RET END(armv7_sev) ENTRY(armv7_auxctrl) mrc p15, 0, r2, c1, c0, 1 bic r3, r2, r0 /* Clear bits */ eor r3, r3, r1 /* XOR bits */ teq r2, r3 mcrne p15, 0, r3, c1, c0, 1 mov r0, r2 RET END(armv7_auxctrl) /* * Invalidate all I+D+branch cache. Used by startup code, which counts * on the fact that only r0-r3,ip are modified and no stack space is used. */ ENTRY(armv7_idcache_inv_all) mov r0, #0 mcr p15, 2, r0, c0, c0, 0 @ set cache level to L1 mrc p15, 1, r0, c0, c0, 0 @ read CCSIDR ubfx r2, r0, #13, #15 @ get num sets - 1 from CCSIDR ubfx r3, r0, #3, #10 @ get numways - 1 from CCSIDR clz r1, r3 @ number of bits to MSB of way lsl r3, r3, r1 @ shift into position mov ip, #1 @ lsl ip, ip, r1 @ ip now contains the way decr ubfx r0, r0, #0, #3 @ get linesize from CCSIDR add r0, r0, #4 @ apply bias lsl r2, r2, r0 @ shift sets by log2(linesize) add r3, r3, r2 @ merge numsets - 1 with numways - 1 sub ip, ip, r2 @ subtract numsets - 1 from way decr mov r1, #1 lsl r1, r1, r0 @ r1 now contains the set decr mov r2, ip @ r2 now contains set way decr /* r3 = ways/sets, r2 = way decr, r1 = set decr, r0 and ip are free */ 1: mcr p15, 0, r3, c7, c6, 2 @ invalidate line movs r0, r3 @ get current way/set beq 2f @ at 0 means we are done. movs r0, r0, lsl #10 @ clear way bits leaving only set bits subne r3, r3, r1 @ non-zero?, decrement set # subeq r3, r3, r2 @ zero?, decrement way # and restore set count b 1b 2: dsb @ wait for stores to finish mov r0, #0 @ and ... mcr p15, 0, r0, c7, c5, 0 @ invalidate instruction+branch cache isb @ instruction sync barrier bx lr @ return -END(armv7_l1cache_inv_all) +END(armv7_idcache_inv_all) ENTRY_NP(armv7_sleep) dsb wfi bx lr END(armv7_sleep) Index: stable/10/sys/arm/arm/cpufunc_asm_xscale.S =================================================================== --- stable/10/sys/arm/arm/cpufunc_asm_xscale.S (revision 269795) +++ stable/10/sys/arm/arm/cpufunc_asm_xscale.S (revision 269796) @@ -1,522 +1,523 @@ /* $NetBSD: cpufunc_asm_xscale.S,v 1.16 2002/08/17 16:36:32 thorpej Exp $ */ /*- * Copyright (c) 2001, 2002 Wasabi Systems, Inc. * All rights reserved. * * Written by Allen Briggs and Jason R. Thorpe for Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project by * Wasabi Systems, Inc. * 4. The name of Wasabi Systems, Inc. may not be used to endorse * or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */ /*- * Copyright (c) 2001 Matt Thomas. * Copyright (c) 1997,1998 Mark Brinicombe. * Copyright (c) 1997 Causality Limited * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Causality Limited. * 4. The name of Causality Limited may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY CAUSALITY LIMITED ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL CAUSALITY LIMITED BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * XScale assembly functions for CPU / MMU / TLB specific operations */ #include __FBSDID("$FreeBSD$"); /* * Size of the XScale core D-cache. */ #define DCACHE_SIZE 0x00008000 .Lblock_userspace_access: .word _C_LABEL(block_userspace_access) /* * CPWAIT -- Canonical method to wait for CP15 update. * From: Intel 80200 manual, section 2.3.3. * * NOTE: Clobbers the specified temp reg. */ #define CPWAIT_BRANCH \ sub pc, pc, #4 #define CPWAIT(tmp) \ mrc p15, 0, tmp, c2, c0, 0 /* arbitrary read of CP15 */ ;\ mov tmp, tmp /* wait for it to complete */ ;\ CPWAIT_BRANCH /* branch to next insn */ #define CPWAIT_AND_RETURN_SHIFTER lsr #32 #define CPWAIT_AND_RETURN(tmp) \ mrc p15, 0, tmp, c2, c0, 0 /* arbitrary read of CP15 */ ;\ /* Wait for it to complete and branch to the return address */ \ sub pc, lr, tmp, CPWAIT_AND_RETURN_SHIFTER ENTRY(xscale_cpwait) CPWAIT_AND_RETURN(r0) END(xscale_cpwait) /* * We need a separate cpu_control() entry point, since we have to * invalidate the Branch Target Buffer in the event the BPRD bit * changes in the control register. */ ENTRY(xscale_control) mrc p15, 0, r3, c1, c0, 0 /* Read the control register */ bic r2, r3, r0 /* Clear bits */ eor r2, r2, r1 /* XOR bits */ teq r2, r3 /* Only write if there was a change */ mcrne p15, 0, r0, c7, c5, 6 /* Invalidate the BTB */ mcrne p15, 0, r2, c1, c0, 0 /* Write new control register */ mov r0, r3 /* Return old value */ CPWAIT_AND_RETURN(r1) END(xscale_control) /* * Functions to set the MMU Translation Table Base register * * We need to clean and flush the cache as it uses virtual * addresses that are about to change. */ ENTRY(xscale_setttb) #ifdef CACHE_CLEAN_BLOCK_INTR mrs r3, cpsr orr r1, r3, #(I32_bit | F32_bit) msr cpsr_fsxc, r1 #else ldr r3, .Lblock_userspace_access ldr r2, [r3] orr r1, r2, #1 str r1, [r3] #endif stmfd sp!, {r0-r3, lr} bl _C_LABEL(xscale_cache_cleanID) mcr p15, 0, r0, c7, c5, 0 /* invalidate I$ and BTB */ mcr p15, 0, r0, c7, c10, 4 /* drain write and fill buffer */ CPWAIT(r0) ldmfd sp!, {r0-r3, lr} /* Write the TTB */ mcr p15, 0, r0, c2, c0, 0 /* If we have updated the TTB we must flush the TLB */ mcr p15, 0, r0, c8, c7, 0 /* invalidate I+D TLB */ /* The cleanID above means we only need to flush the I cache here */ mcr p15, 0, r0, c7, c5, 0 /* invalidate I$ and BTB */ CPWAIT(r0) #ifdef CACHE_CLEAN_BLOCK_INTR msr cpsr_fsxc, r3 #else str r2, [r3] #endif RET END(xscale_setttb) /* * TLB functions * */ ENTRY(xscale_tlb_flushID_SE) mcr p15, 0, r0, c8, c6, 1 /* flush D tlb single entry */ mcr p15, 0, r0, c8, c5, 1 /* flush I tlb single entry */ CPWAIT_AND_RETURN(r0) END(xscale_tlb_flushID_SE) /* * Cache functions */ ENTRY(xscale_cache_flushID) mcr p15, 0, r0, c7, c7, 0 /* flush I+D cache */ CPWAIT_AND_RETURN(r0) END(xscale_cache_flushID) ENTRY(xscale_cache_flushI) mcr p15, 0, r0, c7, c5, 0 /* flush I cache */ CPWAIT_AND_RETURN(r0) END(xscale_cache_flushI) ENTRY(xscale_cache_flushD) mcr p15, 0, r0, c7, c6, 0 /* flush D cache */ CPWAIT_AND_RETURN(r0) END(xscale_cache_flushD) ENTRY(xscale_cache_flushI_SE) mcr p15, 0, r0, c7, c5, 1 /* flush I cache single entry */ CPWAIT_AND_RETURN(r0) END(xscale_cache_flushI_SE) ENTRY(xscale_cache_flushD_SE) /* * Errata (rev < 2): Must clean-dcache-line to an address * before invalidate-dcache-line to an address, or dirty * bits will not be cleared in the dcache array. */ mcr p15, 0, r0, c7, c10, 1 mcr p15, 0, r0, c7, c6, 1 /* flush D cache single entry */ CPWAIT_AND_RETURN(r0) END(xscale_cache_flushD_SE) ENTRY(xscale_cache_cleanD_E) mcr p15, 0, r0, c7, c10, 1 /* clean D cache entry */ CPWAIT_AND_RETURN(r0) END(xscale_cache_cleanD_E) /* * Information for the XScale cache clean/purge functions: * * * Virtual address of the memory region to use * * Size of memory region * * Note the virtual address for the Data cache clean operation * does not need to be backed by physical memory, since no loads * will actually be performed by the allocate-line operation. * * Note that the Mini-Data cache MUST be cleaned by executing * loads from memory mapped into a region reserved exclusively * for cleaning of the Mini-Data cache. */ .data .global _C_LABEL(xscale_cache_clean_addr) _C_LABEL(xscale_cache_clean_addr): .word 0x00000000 .global _C_LABEL(xscale_cache_clean_size) _C_LABEL(xscale_cache_clean_size): .word DCACHE_SIZE .global _C_LABEL(xscale_minidata_clean_addr) _C_LABEL(xscale_minidata_clean_addr): .word 0x00000000 .global _C_LABEL(xscale_minidata_clean_size) _C_LABEL(xscale_minidata_clean_size): .word 0x00000800 .text .Lxscale_cache_clean_addr: .word _C_LABEL(xscale_cache_clean_addr) .Lxscale_cache_clean_size: .word _C_LABEL(xscale_cache_clean_size) .Lxscale_minidata_clean_addr: .word _C_LABEL(xscale_minidata_clean_addr) .Lxscale_minidata_clean_size: .word _C_LABEL(xscale_minidata_clean_size) #ifdef CACHE_CLEAN_BLOCK_INTR #define XSCALE_CACHE_CLEAN_BLOCK \ mrs r3, cpsr ; \ orr r0, r3, #(I32_bit | F32_bit) ; \ msr cpsr_fsxc, r0 #define XSCALE_CACHE_CLEAN_UNBLOCK \ msr cpsr_fsxc, r3 #else #define XSCALE_CACHE_CLEAN_BLOCK \ ldr r3, .Lblock_userspace_access ; \ ldr ip, [r3] ; \ orr r0, ip, #1 ; \ str r0, [r3] #define XSCALE_CACHE_CLEAN_UNBLOCK \ str ip, [r3] #endif /* CACHE_CLEAN_BLOCK_INTR */ #define XSCALE_CACHE_CLEAN_PROLOGUE \ XSCALE_CACHE_CLEAN_BLOCK ; \ ldr r2, .Lxscale_cache_clean_addr ; \ ldmia r2, {r0, r1} ; \ /* \ * BUG ALERT! \ * \ * The XScale core has a strange cache eviction bug, which \ * requires us to use 2x the cache size for the cache clean \ * and for that area to be aligned to 2 * cache size. \ * \ * The work-around is to use 2 areas for cache clean, and to \ * alternate between them whenever this is done. No one knows \ * why the work-around works (mmm!). \ */ \ eor r0, r0, #(DCACHE_SIZE) ; \ str r0, [r2] ; \ add r0, r0, r1 #define XSCALE_CACHE_CLEAN_EPILOGUE \ XSCALE_CACHE_CLEAN_UNBLOCK ENTRY_NP(xscale_cache_syncI) -ENTRY_NP(xscale_cache_purgeID) + +EENTRY_NP(xscale_cache_purgeID) mcr p15, 0, r0, c7, c5, 0 /* flush I cache (D cleaned below) */ -ENTRY_NP(xscale_cache_cleanID) -ENTRY_NP(xscale_cache_purgeD) -ENTRY(xscale_cache_cleanD) +EENTRY_NP(xscale_cache_cleanID) +EENTRY_NP(xscale_cache_purgeD) +EENTRY(xscale_cache_cleanD) XSCALE_CACHE_CLEAN_PROLOGUE 1: subs r0, r0, #32 mcr p15, 0, r0, c7, c2, 5 /* allocate cache line */ subs r1, r1, #32 bne 1b CPWAIT(r0) mcr p15, 0, r0, c7, c10, 4 /* drain write buffer */ CPWAIT(r0) XSCALE_CACHE_CLEAN_EPILOGUE RET +EEND(xscale_cache_cleanD) +EEND(xscale_cache_purgeD) +EEND(xscale_cache_cleanID) +EEND(xscale_cache_purgeID) END(xscale_cache_syncI) -END(xscale_cache_purgeID) -END(xscale_cache_cleanID) -END(xscale_cache_purgeD) -END(xscale_cache_cleanD) /* * Clean the mini-data cache. * * It's expected that we only use the mini-data cache for * kernel addresses, so there is no need to purge it on * context switch, and no need to prevent userspace access * while we clean it. */ ENTRY(xscale_cache_clean_minidata) ldr r2, .Lxscale_minidata_clean_addr ldmia r2, {r0, r1} 1: ldr r3, [r0], #32 subs r1, r1, #32 bne 1b mcr p15, 0, r0, c7, c10, 4 /* drain write buffer */ CPWAIT_AND_RETURN(r1) END(xscale_cache_clean_minidata) ENTRY(xscale_cache_purgeID_E) mcr p15, 0, r0, c7, c10, 1 /* clean D cache entry */ CPWAIT(r1) mcr p15, 0, r0, c7, c10, 4 /* drain write buffer */ mcr p15, 0, r0, c7, c5, 1 /* flush I cache single entry */ mcr p15, 0, r0, c7, c6, 1 /* flush D cache single entry */ CPWAIT_AND_RETURN(r1) END(xscale_cache_purgeID_E) ENTRY(xscale_cache_purgeD_E) mcr p15, 0, r0, c7, c10, 1 /* clean D cache entry */ CPWAIT(r1) mcr p15, 0, r0, c7, c10, 4 /* drain write buffer */ mcr p15, 0, r0, c7, c6, 1 /* flush D cache single entry */ CPWAIT_AND_RETURN(r1) END(xscale_cache_purgeD_E) /* * Soft functions */ /* xscale_cache_syncI is identical to xscale_cache_purgeID */ -ENTRY(xscale_cache_cleanID_rng) +EENTRY(xscale_cache_cleanID_rng) ENTRY(xscale_cache_cleanD_rng) cmp r1, #0x4000 bcs _C_LABEL(xscale_cache_cleanID) and r2, r0, #0x1f add r1, r1, r2 bic r0, r0, #0x1f 1: mcr p15, 0, r0, c7, c10, 1 /* clean D cache entry */ add r0, r0, #32 subs r1, r1, #32 bhi 1b CPWAIT(r0) mcr p15, 0, r0, c7, c10, 4 /* drain write buffer */ CPWAIT_AND_RETURN(r0) -END(xscale_cache_cleanID_rng) +/*END(xscale_cache_cleanID_rng)*/ END(xscale_cache_cleanD_rng) ENTRY(xscale_cache_purgeID_rng) cmp r1, #0x4000 bcs _C_LABEL(xscale_cache_purgeID) and r2, r0, #0x1f add r1, r1, r2 bic r0, r0, #0x1f 1: mcr p15, 0, r0, c7, c10, 1 /* clean D cache entry */ mcr p15, 0, r0, c7, c6, 1 /* flush D cache single entry */ mcr p15, 0, r0, c7, c5, 1 /* flush I cache single entry */ add r0, r0, #32 subs r1, r1, #32 bhi 1b CPWAIT(r0) mcr p15, 0, r0, c7, c10, 4 /* drain write buffer */ CPWAIT_AND_RETURN(r0) END(xscale_cache_purgeID_rng) ENTRY(xscale_cache_purgeD_rng) cmp r1, #0x4000 bcs _C_LABEL(xscale_cache_purgeD) and r2, r0, #0x1f add r1, r1, r2 bic r0, r0, #0x1f 1: mcr p15, 0, r0, c7, c10, 1 /* clean D cache entry */ mcr p15, 0, r0, c7, c6, 1 /* flush D cache single entry */ add r0, r0, #32 subs r1, r1, #32 bhi 1b CPWAIT(r0) mcr p15, 0, r0, c7, c10, 4 /* drain write buffer */ CPWAIT_AND_RETURN(r0) END(xscale_cache_purgeD_rng) ENTRY(xscale_cache_syncI_rng) cmp r1, #0x4000 bcs _C_LABEL(xscale_cache_syncI) and r2, r0, #0x1f add r1, r1, r2 bic r0, r0, #0x1f 1: mcr p15, 0, r0, c7, c10, 1 /* clean D cache entry */ mcr p15, 0, r0, c7, c5, 1 /* flush I cache single entry */ add r0, r0, #32 subs r1, r1, #32 bhi 1b CPWAIT(r0) mcr p15, 0, r0, c7, c10, 4 /* drain write buffer */ CPWAIT_AND_RETURN(r0) END(xscale_cache_syncI_rng) ENTRY(xscale_cache_flushD_rng) and r2, r0, #0x1f add r1, r1, r2 bic r0, r0, #0x1f 1: mcr p15, 0, r0, c7, c6, 1 /* flush D cache single entry */ add r0, r0, #32 subs r1, r1, #32 bhi 1b mcr p15, 0, r0, c7, c10, 4 /* drain write buffer */ CPWAIT_AND_RETURN(r0) END(xscale_cache_flushD_rng) /* * Context switch. * * These is the CPU-specific parts of the context switcher cpu_switch() * These functions actually perform the TTB reload. * * NOTE: Special calling convention * r1, r4-r13 must be preserved */ ENTRY(xscale_context_switch) /* * CF_CACHE_PURGE_ID will *ALWAYS* be called prior to this. * Thus the data cache will contain only kernel data and the * instruction cache will contain only kernel code, and all * kernel mappings are shared by all processes. */ /* Write the TTB */ mcr p15, 0, r0, c2, c0, 0 /* If we have updated the TTB we must flush the TLB */ mcr p15, 0, r0, c8, c7, 0 /* flush the I+D tlb */ CPWAIT_AND_RETURN(r0) END(xscale_context_switch) /* * xscale_cpu_sleep * * This is called when there is nothing on any of the run queues. * We go into IDLE mode so that any IRQ or FIQ will awaken us. * * If this is called with anything other than ARM_SLEEP_MODE_IDLE, * ignore it. */ ENTRY(xscale_cpu_sleep) tst r0, #0x00000000 bne 1f mov r0, #0x1 mcr p14, 0, r0, c7, c0, 0 1: RET END(xscale_cpu_sleep) Index: stable/10/sys/arm/arm/cpufunc_asm_xscale_c3.S =================================================================== --- stable/10/sys/arm/arm/cpufunc_asm_xscale_c3.S (revision 269795) +++ stable/10/sys/arm/arm/cpufunc_asm_xscale_c3.S (revision 269796) @@ -1,415 +1,416 @@ /* $NetBSD: cpufunc_asm_xscale.S,v 1.16 2002/08/17 16:36:32 thorpej Exp $ */ /*- * Copyright (c) 2007 Olivier Houchard * Copyright (c) 2001, 2002 Wasabi Systems, Inc. * All rights reserved. * * Written by Allen Briggs and Jason R. Thorpe for Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project by * Wasabi Systems, Inc. * 4. The name of Wasabi Systems, Inc. may not be used to endorse * or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */ /*- * Copyright (c) 2001 Matt Thomas. * Copyright (c) 1997,1998 Mark Brinicombe. * Copyright (c) 1997 Causality Limited * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Causality Limited. * 4. The name of Causality Limited may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY CAUSALITY LIMITED ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL CAUSALITY LIMITED BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * XScale core 3 assembly functions for CPU / MMU / TLB specific operations */ #include __FBSDID("$FreeBSD$"); /* * Size of the XScale core D-cache. */ #define DCACHE_SIZE 0x00008000 .Lblock_userspace_access: .word _C_LABEL(block_userspace_access) /* * CPWAIT -- Canonical method to wait for CP15 update. * From: Intel 80200 manual, section 2.3.3. * * NOTE: Clobbers the specified temp reg. */ #define CPWAIT_BRANCH \ sub pc, pc, #4 #define CPWAIT(tmp) \ mrc p15, 0, tmp, c2, c0, 0 /* arbitrary read of CP15 */ ;\ mov tmp, tmp /* wait for it to complete */ ;\ CPWAIT_BRANCH /* branch to next insn */ #define CPWAIT_AND_RETURN_SHIFTER lsr #32 #define CPWAIT_AND_RETURN(tmp) \ mrc p15, 0, tmp, c2, c0, 0 /* arbitrary read of CP15 */ ;\ /* Wait for it to complete and branch to the return address */ \ sub pc, lr, tmp, CPWAIT_AND_RETURN_SHIFTER #define ARM_USE_L2_CACHE #define L2_CACHE_SIZE 0x80000 #define L2_CACHE_WAYS 8 #define L2_CACHE_LINE_SIZE 32 #define L2_CACHE_SETS (L2_CACHE_SIZE / \ (L2_CACHE_WAYS * L2_CACHE_LINE_SIZE)) #define L1_DCACHE_SIZE 32 * 1024 #define L1_DCACHE_WAYS 4 #define L1_DCACHE_LINE_SIZE 32 #define L1_DCACHE_SETS (L1_DCACHE_SIZE / \ (L1_DCACHE_WAYS * L1_DCACHE_LINE_SIZE)) #ifdef CACHE_CLEAN_BLOCK_INTR #define XSCALE_CACHE_CLEAN_BLOCK \ stmfd sp!, {r4} ; \ mrs r4, cpsr ; \ orr r0, r4, #(I32_bit | F32_bit) ; \ msr cpsr_fsxc, r0 #define XSCALE_CACHE_CLEAN_UNBLOCK \ msr cpsr_fsxc, r4 ; \ ldmfd sp!, {r4} #else #define XSCALE_CACHE_CLEAN_BLOCK \ stmfd sp!, {r4} ; \ ldr r4, .Lblock_userspace_access ; \ ldr ip, [r4] ; \ orr r0, ip, #1 ; \ str r0, [r4] #define XSCALE_CACHE_CLEAN_UNBLOCK \ str ip, [r3] ; \ ldmfd sp!, {r4} #endif /* CACHE_CLEAN_BLOCK_INTR */ ENTRY_NP(xscalec3_cache_syncI) -ENTRY_NP(xscalec3_cache_purgeID) +xscalec3_cache_purgeID: +EENTRY_NP(xscalec3_cache_purgeID) mcr p15, 0, r0, c7, c5, 0 /* flush I cache (D cleaned below) */ -ENTRY_NP(xscalec3_cache_cleanID) -ENTRY_NP(xscalec3_cache_purgeD) -ENTRY(xscalec3_cache_cleanD) +EENTRY_NP(xscalec3_cache_cleanID) +EENTRY_NP(xscalec3_cache_purgeD) +EENTRY(xscalec3_cache_cleanD) XSCALE_CACHE_CLEAN_BLOCK mov r0, #0 1: mov r1, r0, asl #30 mov r2, #0 2: orr r3, r1, r2, asl #5 mcr p15, 0, r3, c7, c14, 2 /* clean and invalidate */ add r2, r2, #1 cmp r2, #L1_DCACHE_SETS bne 2b add r0, r0, #1 cmp r0, #4 bne 1b CPWAIT(r0) XSCALE_CACHE_CLEAN_UNBLOCK mcr p15, 0, r0, c7, c10, 4 /* drain write buffer */ RET +EEND(xscalec3_cache_purgeID) +EEND(xscalec3_cache_cleanID) +EEND(xscalec3_cache_purgeD) +EEND(xscalec3_cache_cleanD) END(xscalec3_cache_syncI) -END(xscalec3_cache_purgeID) -END(xscalec3_cache_cleanID) -END(xscalec3_cache_purgeD) -END(xscalec3_cache_cleanD) ENTRY(xscalec3_cache_purgeID_rng) cmp r1, #0x4000 bcs _C_LABEL(xscalec3_cache_cleanID) and r2, r0, #0x1f add r1, r1, r2 bic r0, r0, #0x1f 1: mcr p15, 0, r0, c7, c14, 1 /* clean/invalidate L1 D cache entry */ nop mcr p15, 0, r0, c7, c5, 1 /* flush I cache single entry */ add r0, r0, #32 subs r1, r1, #32 bhi 1b CPWAIT(r0) mcr p15, 0, r0, c7, c10, 4 /* drain write buffer */ CPWAIT_AND_RETURN(r0) END(xscalec3_cache_purgeID_rng) ENTRY(xscalec3_cache_syncI_rng) cmp r1, #0x4000 bcs _C_LABEL(xscalec3_cache_syncI) and r2, r0, #0x1f add r1, r1, r2 bic r0, r0, #0x1f 1: mcr p15, 0, r0, c7, c10, 1 /* clean D cache entry */ mcr p15, 0, r0, c7, c5, 1 /* flush I cache single entry */ add r0, r0, #32 subs r1, r1, #32 bhi 1b CPWAIT(r0) mcr p15, 0, r0, c7, c10, 4 /* drain write buffer */ CPWAIT_AND_RETURN(r0) END(xscalec3_cache_syncI_rng) ENTRY(xscalec3_cache_purgeD_rng) cmp r1, #0x4000 bcs _C_LABEL(xscalec3_cache_cleanID) and r2, r0, #0x1f add r1, r1, r2 bic r0, r0, #0x1f 1: mcr p15, 0, r0, c7, c14, 1 /* Clean and invalidate D cache entry */ add r0, r0, #32 subs r1, r1, #32 bhi 1b CPWAIT(r0) mcr p15, 0, r0, c7, c10, 4 /* drain write buffer */ CPWAIT_AND_RETURN(r0) END(xscalec3_cache_purgeD_rng) ENTRY(xscalec3_cache_cleanID_rng) -ENTRY(xscalec3_cache_cleanD_rng) +EENTRY(xscalec3_cache_cleanD_rng) cmp r1, #0x4000 bcs _C_LABEL(xscalec3_cache_cleanID) and r2, r0, #0x1f add r1, r1, r2 bic r0, r0, #0x1f 1: mcr p15, 0, r0, c7, c10, 1 /* clean L1 D cache entry */ nop add r0, r0, #32 subs r1, r1, #32 bhi 1b CPWAIT(r0) mcr p15, 0, r0, c7, c10, 4 /* drain write buffer */ CPWAIT_AND_RETURN(r0) +EEND(xscalec3_cache_cleanD_rng) END(xscalec3_cache_cleanID_rng) -END(xscalec3_cache_cleanD_rng) ENTRY(xscalec3_l2cache_purge) /* Clean-up the L2 cache */ mcr p15, 0, r0, c7, c10, 5 /* Data memory barrier */ mov r0, #0 1: mov r1, r0, asl #29 mov r2, #0 2: orr r3, r1, r2, asl #5 mcr p15, 1, r3, c7, c15, 2 add r2, r2, #1 cmp r2, #L2_CACHE_SETS bne 2b add r0, r0, #1 cmp r0, #8 bne 1b mcr p15, 0, r0, c7, c10, 4 @ data write barrier CPWAIT(r0) mcr p15, 0, r0, c7, c10, 5 /* Data memory barrier */ RET END(xscalec3_l2cache_purge) ENTRY(xscalec3_l2cache_clean_rng) mcr p15, 0, r0, c7, c10, 5 /* Data memory barrier */ and r2, r0, #0x1f add r1, r1, r2 bic r0, r0, #0x1f 1: mcr p15, 1, r0, c7, c11, 1 /* Clean L2 D cache entry */ add r0, r0, #32 subs r1, r1, #32 bhi 1b CPWAIT(r0) mcr p15, 0, r0, c7, c10, 4 @ data write barrier mcr p15, 0, r0, c7, c10, 5 CPWAIT_AND_RETURN(r0) END(xscalec3_l2cache_clean_rng) ENTRY(xscalec3_l2cache_purge_rng) mcr p15, 0, r0, c7, c10, 5 /* Data memory barrier */ and r2, r0, #0x1f add r1, r1, r2 bic r0, r0, #0x1f 1: mcr p15, 1, r0, c7, c11, 1 /* Clean L2 D cache entry */ mcr p15, 1, r0, c7, c7, 1 /* Invalidate L2 D cache entry */ add r0, r0, #32 subs r1, r1, #32 bhi 1b mcr p15, 0, r0, c7, c10, 4 @ data write barrier mcr p15, 0, r0, c7, c10, 5 CPWAIT_AND_RETURN(r0) END(xscalec3_l2cache_purge_rng) ENTRY(xscalec3_l2cache_flush_rng) mcr p15, 0, r0, c7, c10, 5 /* Data memory barrier */ and r2, r0, #0x1f add r1, r1, r2 bic r0, r0, #0x1f 1: mcr p15, 1, r0, c7, c7, 1 /* Invalidate L2 cache line */ add r0, r0, #32 subs r1, r1, #32 bhi 1b mcr p15, 0, r0, c7, c10, 4 @ data write barrier mcr p15, 0, r0, c7, c10, 5 CPWAIT_AND_RETURN(r0) END(xscalec3_l2cache_flush_rng) /* * Functions to set the MMU Translation Table Base register * * We need to clean and flush the cache as it uses virtual * addresses that are about to change. */ ENTRY(xscalec3_setttb) #ifdef CACHE_CLEAN_BLOCK_INTR mrs r3, cpsr orr r1, r3, #(I32_bit | F32_bit) msr cpsr_fsxc, r1 #else ldr r3, .Lblock_userspace_access ldr r2, [r3] orr r1, r2, #1 str r1, [r3] #endif stmfd sp!, {r0-r3, lr} bl _C_LABEL(xscalec3_cache_cleanID) mcr p15, 0, r0, c7, c5, 0 /* invalidate I$ and BTB */ mcr p15, 0, r0, c7, c10, 4 /* drain write and fill buffer */ CPWAIT(r0) ldmfd sp!, {r0-r3, lr} #ifdef ARM_USE_L2_CACHE orr r0, r0, #0x18 /* cache the page table in L2 */ #endif /* Write the TTB */ mcr p15, 0, r0, c2, c0, 0 /* If we have updated the TTB we must flush the TLB */ mcr p15, 0, r0, c8, c7, 0 /* invalidate I+D TLB */ CPWAIT(r0) #ifdef CACHE_CLEAN_BLOCK_INTR msr cpsr_fsxc, r3 #else str r2, [r3] #endif RET END(xscalec3_setttb) /* * Context switch. * * These is the CPU-specific parts of the context switcher cpu_switch() * These functions actually perform the TTB reload. * * NOTE: Special calling convention * r1, r4-r13 must be preserved */ ENTRY(xscalec3_context_switch) /* * CF_CACHE_PURGE_ID will *ALWAYS* be called prior to this. * Thus the data cache will contain only kernel data and the * instruction cache will contain only kernel code, and all * kernel mappings are shared by all processes. */ #ifdef ARM_USE_L2_CACHE orr r0, r0, #0x18 /* Cache the page table in L2 */ #endif /* Write the TTB */ mcr p15, 0, r0, c2, c0, 0 /* If we have updated the TTB we must flush the TLB */ mcr p15, 0, r0, c8, c7, 0 /* flush the I+D tlb */ CPWAIT_AND_RETURN(r0) END(xscalec3_context_switch) Index: stable/10/sys/arm/arm/exception.S =================================================================== --- stable/10/sys/arm/arm/exception.S (revision 269795) +++ stable/10/sys/arm/arm/exception.S (revision 269796) @@ -1,463 +1,463 @@ /* $NetBSD: exception.S,v 1.13 2003/10/31 16:30:15 scw Exp $ */ /*- * Copyright (c) 1994-1997 Mark Brinicombe. * Copyright (c) 1994 Brini. * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Brini. * 4. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * RiscBSD kernel project * * exception.S * * Low level handlers for exception vectors * * Created : 24/09/94 * * Based on kate/display/abort.s * */ #include "assym.s" #include #include #include __FBSDID("$FreeBSD$"); .text .align 0 /* * ASM macros for pushing and pulling trapframes from the stack * * These macros are used to handle the irqframe and trapframe structures * defined above. */ /* * PUSHFRAME - macro to push a trap frame on the stack in the current mode * Since the current mode is used, the SVC lr field is not defined. * * NOTE: r13 and r14 are stored separately as a work around for the * SA110 rev 2 STM^ bug */ #ifdef ARM_TP_ADDRESS #define PUSHFRAME \ sub sp, sp, #4; /* Align the stack */ \ str lr, [sp, #-4]!; /* Push the return address */ \ sub sp, sp, #(4*17); /* Adjust the stack pointer */ \ stmia sp, {r0-r12}; /* Push the user mode registers */ \ add r0, sp, #(4*13); /* Adjust the stack pointer */ \ stmia r0, {r13-r14}^; /* Push the user mode registers */ \ mov r0, r0; /* NOP for previous instruction */ \ mrs r0, spsr; /* Put the SPSR on the stack */ \ str r0, [sp, #-4]!; \ ldr r0, =ARM_RAS_START; \ mov r1, #0; \ str r1, [r0]; \ mov r1, #0xffffffff; \ str r1, [r0, #4]; #else #define PUSHFRAME \ sub sp, sp, #4; /* Align the stack */ \ str lr, [sp, #-4]!; /* Push the return address */ \ sub sp, sp, #(4*17); /* Adjust the stack pointer */ \ stmia sp, {r0-r12}; /* Push the user mode registers */ \ add r0, sp, #(4*13); /* Adjust the stack pointer */ \ stmia r0, {r13-r14}^; /* Push the user mode registers */ \ mov r0, r0; /* NOP for previous instruction */ \ mrs r0, spsr; /* Put the SPSR on the stack */ \ str r0, [sp, #-4]!; #endif /* * PULLFRAME - macro to pull a trap frame from the stack in the current mode * Since the current mode is used, the SVC lr field is ignored. */ #ifdef ARM_TP_ADDRESS #define PULLFRAME \ ldr r0, [sp], #4; /* Get the SPSR from stack */ \ msr spsr_fsxc, r0; \ ldmia sp, {r0-r14}^; /* Restore registers (usr mode) */ \ mov r0, r0; /* NOP for previous instruction */ \ add sp, sp, #(4*17); /* Adjust the stack pointer */ \ ldr lr, [sp], #4; /* Pull the return address */ \ add sp, sp, #4 /* Align the stack */ #else #define PULLFRAME \ ldr r0, [sp], #4 ; /* Get the SPSR from stack */ \ msr spsr_fsxc, r0; \ clrex; \ ldmia sp, {r0-r14}^; /* Restore registers (usr mode) */ \ mov r0, r0; /* NOP for previous instruction */ \ add sp, sp, #(4*17); /* Adjust the stack pointer */ \ ldr lr, [sp], #4; /* Pull the return address */ \ add sp, sp, #4 /* Align the stack */ #endif /* * PUSHFRAMEINSVC - macro to push a trap frame on the stack in SVC32 mode * This should only be used if the processor is not currently in SVC32 * mode. The processor mode is switched to SVC mode and the trap frame is * stored. The SVC lr field is used to store the previous value of * lr in SVC mode. * * NOTE: r13 and r14 are stored separately as a work around for the * SA110 rev 2 STM^ bug */ #ifdef ARM_TP_ADDRESS #define PUSHFRAMEINSVC \ stmdb sp, {r0-r3}; /* Save 4 registers */ \ mov r0, lr; /* Save xxx32 r14 */ \ mov r1, sp; /* Save xxx32 sp */ \ mrs r3, spsr; /* Save xxx32 spsr */ \ mrs r2, cpsr; /* Get the CPSR */ \ bic r2, r2, #(PSR_MODE); /* Fix for SVC mode */ \ orr r2, r2, #(PSR_SVC32_MODE); \ msr cpsr_c, r2; /* Punch into SVC mode */ \ mov r2, sp; /* Save SVC sp */ \ bic sp, sp, #7; /* Align sp to an 8-byte addrress */ \ sub sp, sp, #4; /* Pad trapframe to keep alignment */ \ str r0, [sp, #-4]!; /* Push return address */ \ str lr, [sp, #-4]!; /* Push SVC lr */ \ str r2, [sp, #-4]!; /* Push SVC sp */ \ msr spsr_fsxc, r3; /* Restore correct spsr */ \ ldmdb r1, {r0-r3}; /* Restore 4 regs from xxx mode */ \ sub sp, sp, #(4*15); /* Adjust the stack pointer */ \ stmia sp, {r0-r12}; /* Push the user mode registers */ \ add r0, sp, #(4*13); /* Adjust the stack pointer */ \ stmia r0, {r13-r14}^; /* Push the user mode registers */ \ mov r0, r0; /* NOP for previous instruction */ \ ldr r5, =ARM_RAS_START; /* Check if there's any RAS */ \ ldr r4, [r5, #4]; /* reset it to point at the */ \ cmp r4, #0xffffffff; /* end of memory if necessary; */ \ movne r1, #0xffffffff; /* leave value in r4 for later */ \ strne r1, [r5, #4]; /* comparision against PC. */ \ ldr r3, [r5]; /* Retrieve global RAS_START */ \ cmp r3, #0; /* and reset it if non-zero. */ \ movne r1, #0; /* If non-zero RAS_START and */ \ strne r1, [r5]; /* PC was lower than RAS_END, */ \ ldrne r1, [r0, #16]; /* adjust the saved PC so that */ \ cmpne r4, r1; /* execution later resumes at */ \ strhi r3, [r0, #16]; /* the RAS_START location. */ \ mrs r0, spsr; \ str r0, [sp, #-4]! #else #define PUSHFRAMEINSVC \ stmdb sp, {r0-r3}; /* Save 4 registers */ \ mov r0, lr; /* Save xxx32 r14 */ \ mov r1, sp; /* Save xxx32 sp */ \ mrs r3, spsr; /* Save xxx32 spsr */ \ mrs r2, cpsr; /* Get the CPSR */ \ bic r2, r2, #(PSR_MODE); /* Fix for SVC mode */ \ orr r2, r2, #(PSR_SVC32_MODE); \ msr cpsr_c, r2; /* Punch into SVC mode */ \ mov r2, sp; /* Save SVC sp */ \ bic sp, sp, #7; /* Align sp to an 8-byte addrress */ \ sub sp, sp, #4; /* Pad trapframe to keep alignment */ \ str r0, [sp, #-4]!; /* Push return address */ \ str lr, [sp, #-4]!; /* Push SVC lr */ \ str r2, [sp, #-4]!; /* Push SVC sp */ \ msr spsr_fsxc, r3; /* Restore correct spsr */ \ ldmdb r1, {r0-r3}; /* Restore 4 regs from xxx mode */ \ sub sp, sp, #(4*15); /* Adjust the stack pointer */ \ stmia sp, {r0-r12}; /* Push the user mode registers */ \ add r0, sp, #(4*13); /* Adjust the stack pointer */ \ stmia r0, {r13-r14}^; /* Push the user mode registers */ \ mov r0, r0; /* NOP for previous instruction */ \ mrs r0, spsr; /* Put the SPSR on the stack */ \ str r0, [sp, #-4]! #endif /* * PULLFRAMEFROMSVCANDEXIT - macro to pull a trap frame from the stack * in SVC32 mode and restore the saved processor mode and PC. * This should be used when the SVC lr register needs to be restored on * exit. */ #ifdef ARM_TP_ADDRESS #define PULLFRAMEFROMSVCANDEXIT \ ldr r0, [sp], #4; /* Get the SPSR from stack */ \ msr spsr_fsxc, r0; /* restore SPSR */ \ ldmia sp, {r0-r14}^; /* Restore registers (usr mode) */ \ mov r0, r0; /* NOP for previous instruction */ \ add sp, sp, #(4*15); /* Adjust the stack pointer */ \ ldmia sp, {sp, lr, pc}^ /* Restore lr and exit */ #else #define PULLFRAMEFROMSVCANDEXIT \ ldr r0, [sp], #4; /* Get the SPSR from stack */ \ msr spsr_fsxc, r0; /* restore SPSR */ \ clrex; \ ldmia sp, {r0-r14}^; /* Restore registers (usr mode) */ \ mov r0, r0; /* NOP for previous instruction */ \ add sp, sp, #(4*15); /* Adjust the stack pointer */ \ ldmia sp, {sp, lr, pc}^ /* Restore lr and exit */ #endif #if defined(__ARM_EABI__) /* * Unwind hints so we can unwind past functions that use * PULLFRAMEFROMSVCANDEXIT. They are run in reverse order. * As the last thing we do is restore the stack pointer * we can ignore the padding at the end of struct trapframe. */ #define UNWINDSVCFRAME \ .save {r13-r15}; /* Restore sp, lr, pc */ \ .pad #(2*4); /* Skip user sp and lr */ \ .save {r0-r12}; /* Restore r0-r12 */ \ .pad #(4) /* Skip spsr */ #else #define UNWINDSVCFRAME #endif #define DO_AST \ ldr r0, [sp] /* Get the SPSR from stack */ ;\ mrs r4, cpsr /* save CPSR */ ;\ orr r1, r4, #(I32_bit|F32_bit) ;\ msr cpsr_c, r1 /* Disable interrupts */ ;\ and r0, r0, #(PSR_MODE) /* Returning to USR mode? */ ;\ teq r0, #(PSR_USR32_MODE) ;\ bne 2f /* Nope, get out now */ ;\ bic r4, r4, #(I32_bit|F32_bit) ;\ 1: GET_CURTHREAD_PTR(r5) ;\ ldr r1, [r5, #(TD_FLAGS)] ;\ and r1, r1, #(TDF_ASTPENDING|TDF_NEEDRESCHED) ;\ teq r1, #0x00000000 ;\ beq 2f /* Nope. Just bail */ ;\ msr cpsr_c, r4 /* Restore interrupts */ ;\ mov r0, sp ;\ bl _C_LABEL(ast) /* ast(frame) */ ;\ orr r0, r4, #(I32_bit|F32_bit) ;\ msr cpsr_c, r0 ;\ b 1b ;\ 2: /* * Entry point for a Software Interrupt (SWI). * * The hardware switches to svc32 mode on a swi, so we're already on the * right stack; just build a trapframe and call the handler. */ ASENTRY_NP(swi_entry) PUSHFRAME /* Build the trapframe on the */ mov r0, sp /* scv32 stack, pass it to the */ bl _C_LABEL(swi_handler) /* swi handler. */ /* * The fork_trampoline() code in swtch.S aranges for the MI fork_exit() * to return to swi_exit here, to return to userland. The net effect is * that a newly created thread appears to return from a SWI just like * the parent thread that created it. */ -ASENTRY_NP(swi_exit) +ASEENTRY_NP(swi_exit) DO_AST /* Handle pending signals. */ PULLFRAME /* Deallocate trapframe. */ movs pc, lr /* Return to userland. */ STOP_UNWINDING /* Don't unwind into user mode. */ -END(swi_exit) +EEND(swi_exit) END(swi_entry) /* * Standard exception exit handler. * * This is used to return from all exceptions except SWI. It uses DO_AST and * PULLFRAMEFROMSVCANDEXIT and can only be called if the exception entry code * used PUSHFRAMEINSVC. * * If the return is to user mode, this uses DO_AST to deliver any pending * signals and/or handle TDF_NEEDRESCHED first. */ ASENTRY_NP(exception_exit) DO_AST /* Handle pending signals. */ PULLFRAMEFROMSVCANDEXIT /* Return. */ UNWINDSVCFRAME /* Special unwinding for exceptions. */ END(exception_exit) /* * Entry point for a Prefetch Abort exception. * * The hardware switches to the abort mode stack; we switch to svc32 before * calling the handler, then return directly to the original mode/stack * on exit (without transitioning back through the abort mode stack). */ ASENTRY_NP(prefetch_abort_entry) #ifdef __XSCALE__ nop /* Make absolutely sure any pending */ nop /* imprecise aborts have occurred. */ #endif sub lr, lr, #4 /* Adjust the lr. Transition to scv32 */ PUSHFRAMEINSVC /* mode stack, build trapframe there. */ adr lr, exception_exit /* Return from handler via standard */ mov r0, sp /* exception exit routine. Pass the */ b prefetch_abort_handler /* trapframe to the handler. */ END(prefetch_abort_entry) /* * Entry point for a Data Abort exception. * * The hardware switches to the abort mode stack; we switch to svc32 before * calling the handler, then return directly to the original mode/stack * on exit (without transitioning back through the abort mode stack). */ ASENTRY_NP(data_abort_entry) #ifdef __XSCALE__ nop /* Make absolutely sure any pending */ nop /* imprecise aborts have occurred. */ #endif sub lr, lr, #8 /* Adjust the lr. Transition to scv32 */ PUSHFRAMEINSVC /* mode stack, build trapframe there. */ adr lr, exception_exit /* Return from handler via standard */ mov r0, sp /* exception exit routine. Pass the */ b data_abort_handler /* trapframe to the handler. */ END(data_abort_entry) /* * Entry point for an Undefined Instruction exception. * * The hardware switches to the undefined mode stack; we switch to svc32 before * calling the handler, then return directly to the original mode/stack * on exit (without transitioning back through the undefined mode stack). */ ASENTRY_NP(undefined_entry) sub lr, lr, #4 /* Adjust the lr. Transition to scv32 */ PUSHFRAMEINSVC /* mode stack, build trapframe there. */ adr lr, exception_exit /* Return from handler via standard */ mov r0, sp /* exception exit routine. Pass the */ b undefinedinstruction /* trapframe to the handler. */ END(undefined_entry) /* * Entry point for a normal IRQ. * * The hardware switches to the IRQ mode stack; we switch to svc32 before * calling the handler, then return directly to the original mode/stack * on exit (without transitioning back through the IRQ mode stack). */ ASENTRY_NP(irq_entry) sub lr, lr, #4 /* Adjust the lr. Transition to scv32 */ PUSHFRAMEINSVC /* mode stack, build trapframe there. */ adr lr, exception_exit /* Return from handler via standard */ mov r0, sp /* exception exit routine. Pass the */ b _C_LABEL(arm_irq_handler)/* trapframe to the handler. */ END(irq_entry) /* * Entry point for an FIQ interrupt. * * We don't currently support FIQ handlers very much. Something can * install itself in the FIQ vector using code (that may or may not work * these days) in fiq.c. If nobody does that and an FIQ happens, this * default handler just disables FIQs and otherwise ignores it. */ ASENTRY_NP(fiq_entry) mrs r8, cpsr /* FIQ handling isn't supported, */ bic r8, #(F32_bit) /* just disable FIQ and return. */ msr cpsr_c, r8 /* The r8 we trash here is the */ subs pc, lr, #4 /* banked FIQ-mode r8. */ END(fiq_entry) /* * Entry point for an Address Exception exception. * This is an arm26 exception that should never happen. */ ASENTRY_NP(addr_exception_entry) mov r3, lr mrs r2, spsr mrs r1, cpsr adr r0, Laddr_exception_msg b _C_LABEL(panic) Laddr_exception_msg: .asciz "Address Exception CPSR=0x%08x SPSR=0x%08x LR=0x%08x\n" .balign 4 END(addr_exception_entry) /* * Entry point for the system Reset vector. * This should never happen, so panic. */ ASENTRY_NP(reset_entry) mov r1, lr adr r0, Lreset_panicmsg b _C_LABEL(panic) /* NOTREACHED */ Lreset_panicmsg: .asciz "Reset vector called, LR = 0x%08x" .balign 4 END(reset_entry) /* * page0 and page0_data -- An image of the ARM vectors which is copied to * the ARM vectors page (high or low) as part of CPU initialization. The * code that does the copy assumes that page0_data holds one 32-bit word * of data for each of the predefined ARM vectors. It also assumes that * page0_data follows the vectors in page0, but other stuff can appear * between the two. We currently leave room between the two for some fiq * handler code to be copied in. */ .global _C_LABEL(page0), _C_LABEL(page0_data) _C_LABEL(page0): ldr pc, .Lreset_entry ldr pc, .Lundefined_entry ldr pc, .Lswi_entry ldr pc, .Lprefetch_abort_entry ldr pc, .Ldata_abort_entry ldr pc, .Laddr_exception_entry ldr pc, .Lirq_entry .fiqv: ldr pc, .Lfiq_entry .space 256 /* room for some fiq handler code */ _C_LABEL(page0_data): .Lreset_entry: .word reset_entry .Lundefined_entry: .word undefined_entry .Lswi_entry: .word swi_entry .Lprefetch_abort_entry: .word prefetch_abort_entry .Ldata_abort_entry: .word data_abort_entry .Laddr_exception_entry: .word addr_exception_entry .Lirq_entry: .word irq_entry .Lfiq_entry: .word fiq_entry /* * These items are used by the code in fiq.c to install what it calls the * "null" handler. It's actually our default vector entry that just jumps * to the default handler which just disables FIQs and returns. */ .global _C_LABEL(fiq_nullhandler_code), _C_LABEL(fiq_nullhandler_size) _C_LABEL(fiq_nullhandler_code): .word .fiqv _C_LABEL(fiq_nullhandler_size): .word 4 Index: stable/10/sys/arm/arm/fusu.S =================================================================== --- stable/10/sys/arm/arm/fusu.S (revision 269795) +++ stable/10/sys/arm/arm/fusu.S (revision 269796) @@ -1,393 +1,392 @@ /* $NetBSD: fusu.S,v 1.10 2003/12/01 13:34:44 rearnsha Exp $ */ /*- * Copyright (c) 1996-1998 Mark Brinicombe. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Mark Brinicombe * 4. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include #include #include "assym.s" __FBSDID("$FreeBSD$"); #ifdef _ARM_ARCH_6 #define GET_PCB(tmp) \ mrc p15, 0, tmp, c13, c0, 4; \ add tmp, tmp, #(TD_PCB) #else .Lcurpcb: .word _C_LABEL(__pcpu) + PC_CURPCB #define GET_PCB(tmp) \ ldr tmp, .Lcurpcb #endif /* * fuword(caddr_t uaddr); * Fetch an int from the user's address space. */ -ENTRY_NP(casuword32) ENTRY(casuword) +EENTRY_NP(casuword32) GET_PCB(r3) ldr r3, [r3] #ifdef DIAGNOSTIC teq r3, #0x00000000 beq .Lfusupcbfault #endif stmfd sp!, {r4, r5} adr r4, .Lcasuwordfault str r4, [r3, #PCB_ONFAULT] #ifdef _ARM_ARCH_6 1: cmp r0, #KERNBASE mvnhs r0, #0 bhs 2f ldrex r5, [r0] cmp r5, r1 movne r0, r5 bne 2f strex r5, r2, [r0] cmp r5, #0 bne 1b #else ldrt r5, [r0] cmp r5, r1 movne r0, r5 streqt r2, [r0] #endif moveq r0, r1 2: ldmfd sp!, {r4, r5} mov r1, #0x00000000 str r1, [r3, #PCB_ONFAULT] RET -END(casuword32) +EEND(casuword32) END(casuword) /* * Handle faults from casuword. Clean up and return -1. */ .Lcasuwordfault: mov r0, #0x00000000 str r0, [r3, #PCB_ONFAULT] mvn r0, #0x00000000 ldmfd sp!, {r4, r5} RET /* * fuword(caddr_t uaddr); * Fetch an int from the user's address space. */ -ENTRY_NP(fuword32) ENTRY(fuword) +EENTRY_NP(fuword32) GET_PCB(r2) ldr r2, [r2] #ifdef DIAGNOSTIC teq r2, #0x00000000 beq .Lfusupcbfault #endif adr r1, .Lfusufault str r1, [r2, #PCB_ONFAULT] ldrt r3, [r0] mov r1, #0x00000000 str r1, [r2, #PCB_ONFAULT] mov r0, r3 RET END(fuword32) END(fuword) /* * fusword(caddr_t uaddr); * Fetch a short from the user's address space. */ ENTRY(fusword) GET_PCB(r2) ldr r2, [r2] #ifdef DIAGNOSTIC teq r2, #0x00000000 beq .Lfusupcbfault #endif adr r1, .Lfusufault str r1, [r2, #PCB_ONFAULT] ldrbt r3, [r0], #1 ldrbt ip, [r0] #ifdef __ARMEB__ orr r0, ip, r3, asl #8 #else orr r0, r3, ip, asl #8 #endif mov r1, #0x00000000 str r1, [r2, #PCB_ONFAULT] RET END(fusword) /* * fuswintr(caddr_t uaddr); * Fetch a short from the user's address space. Can be called during an * interrupt. */ ENTRY(fuswintr) ldr r2, Lblock_userspace_access ldr r2, [r2] teq r2, #0 mvnne r0, #0x00000000 RETne GET_PCB(r2) ldr r2, [r2] #ifdef DIAGNOSTIC teq r2, #0x00000000 beq .Lfusupcbfault #endif adr r1, _C_LABEL(fusubailout) str r1, [r2, #PCB_ONFAULT] ldrbt r3, [r0], #1 ldrbt ip, [r0] #ifdef __ARMEB__ orr r0, ip, r3, asl #8 #else orr r0, r3, ip, asl #8 #endif mov r1, #0x00000000 str r1, [r2, #PCB_ONFAULT] RET END(fuswintr) Lblock_userspace_access: .word _C_LABEL(block_userspace_access) .data .align 0 .global _C_LABEL(block_userspace_access) _C_LABEL(block_userspace_access): .word 0 .text /* * fubyte(caddr_t uaddr); * Fetch a byte from the user's address space. */ ENTRY(fubyte) GET_PCB(r2) ldr r2, [r2] #ifdef DIAGNOSTIC teq r2, #0x00000000 beq .Lfusupcbfault #endif adr r1, .Lfusufault str r1, [r2, #PCB_ONFAULT] ldrbt r3, [r0] mov r1, #0x00000000 str r1, [r2, #PCB_ONFAULT] mov r0, r3 RET END(fubyte) /* * Handle faults from [fs]u*(). Clean up and return -1. */ .Lfusufault: mov r0, #0x00000000 str r0, [r2, #PCB_ONFAULT] mvn r0, #0x00000000 RET /* * Handle faults from [fs]u*(). Clean up and return -1. This differs from * fusufault() in that trap() will recognise it and return immediately rather * than trying to page fault. */ /* label must be global as fault.c references it */ .global _C_LABEL(fusubailout) _C_LABEL(fusubailout): mov r0, #0x00000000 str r0, [r2, #PCB_ONFAULT] mvn r0, #0x00000000 RET #ifdef DIAGNOSTIC /* * Handle earlier faults from [fs]u*(), due to no pcb */ .Lfusupcbfault: mov r1, r0 adr r0, fusupcbfaulttext b _C_LABEL(panic) fusupcbfaulttext: .asciz "Yikes - no valid PCB during fusuxxx() addr=%08x\n" .align 0 #endif /* * suword(caddr_t uaddr, int x); * Store an int in the user's address space. */ -ENTRY_NP(suword32) ENTRY(suword) +EENTRY_NP(suword32) GET_PCB(r2) ldr r2, [r2] #ifdef DIAGNOSTIC teq r2, #0x00000000 beq .Lfusupcbfault #endif adr r3, .Lfusufault str r3, [r2, #PCB_ONFAULT] strt r1, [r0] mov r0, #0x00000000 str r0, [r2, #PCB_ONFAULT] RET END(suword32) END(suword) /* * suswintr(caddr_t uaddr, short x); * Store a short in the user's address space. Can be called during an * interrupt. */ ENTRY(suswintr) ldr r2, Lblock_userspace_access ldr r2, [r2] teq r2, #0 mvnne r0, #0x00000000 RETne GET_PCB(r2) ldr r2, [r2] #ifdef DIAGNOSTIC teq r2, #0x00000000 beq .Lfusupcbfault #endif adr r3, _C_LABEL(fusubailout) str r3, [r2, #PCB_ONFAULT] #ifdef __ARMEB__ mov ip, r1, lsr #8 strbt ip, [r0], #1 #else strbt r1, [r0], #1 mov r1, r1, lsr #8 #endif strbt r1, [r0] mov r0, #0x00000000 str r0, [r2, #PCB_ONFAULT] RET END(suswintr) /* * susword(caddr_t uaddr, short x); * Store a short in the user's address space. */ ENTRY(susword) GET_PCB(r2) ldr r2, [r2] #ifdef DIAGNOSTIC teq r2, #0x00000000 beq .Lfusupcbfault #endif adr r3, .Lfusufault str r3, [r2, #PCB_ONFAULT] #ifdef __ARMEB__ mov ip, r1, lsr #8 strbt ip, [r0], #1 #else strbt r1, [r0], #1 mov r1, r1, lsr #8 #endif strbt r1, [r0] mov r0, #0x00000000 str r0, [r2, #PCB_ONFAULT] RET END(susword) /* * subyte(caddr_t uaddr, char x); * Store a byte in the user's address space. */ ENTRY(subyte) GET_PCB(r2) ldr r2, [r2] #ifdef DIAGNOSTIC teq r2, #0x00000000 beq .Lfusupcbfault #endif adr r3, .Lfusufault str r3, [r2, #PCB_ONFAULT] strbt r1, [r0] mov r0, #0x00000000 str r0, [r2, #PCB_ONFAULT] RET END(subyte) - Index: stable/10/sys/arm/arm/locore.S =================================================================== --- stable/10/sys/arm/arm/locore.S (revision 269795) +++ stable/10/sys/arm/arm/locore.S (revision 269796) @@ -1,540 +1,540 @@ /* $NetBSD: locore.S,v 1.14 2003/04/20 16:21:40 thorpej Exp $ */ /*- * Copyright 2011 Semihalf * Copyright (C) 1994-1997 Mark Brinicombe * Copyright (C) 1994 Brini * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Brini. * 4. The name of Brini may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include "assym.s" #include #include #include #include __FBSDID("$FreeBSD$"); /* What size should this really be ? It is only used by initarm() */ #define INIT_ARM_STACK_SIZE (2048 * 4) #define CPWAIT_BRANCH \ sub pc, pc, #4 #define CPWAIT(tmp) \ mrc p15, 0, tmp, c2, c0, 0 /* arbitrary read of CP15 */ ;\ mov tmp, tmp /* wait for it to complete */ ;\ CPWAIT_BRANCH /* branch to next insn */ /* * This is for kvm_mkdb, and should be the address of the beginning * of the kernel text segment (not necessarily the same as kernbase). */ .text .align 0 .globl kernbase .set kernbase,KERNBASE .globl physaddr .set physaddr,PHYSADDR /* * On entry for FreeBSD boot ABI: * r0 - metadata pointer or 0 (boothowto on AT91's boot2) * r1 - if (r0 == 0) then metadata pointer * On entry for Linux boot ABI: * r0 - 0 * r1 - machine type (passed as arg2 to initarm) * r2 - Pointer to a tagged list or dtb image (phys addr) (passed as arg1 initarm) * * For both types of boot we gather up the args, put them in a struct arm_boot_params * structure and pass that to initarm. */ -ENTRY_NP(btext) + .globl btext +btext: ASENTRY_NP(_start) STOP_UNWINDING /* Can't unwind into the bootloader! */ mov r9, r0 /* 0 or boot mode from boot2 */ mov r8, r1 /* Save Machine type */ mov ip, r2 /* Save meta data */ mov fp, r3 /* Future expantion */ /* Make sure interrupts are disabled. */ mrs r7, cpsr orr r7, r7, #(I32_bit|F32_bit) msr cpsr_c, r7 #if defined (FLASHADDR) && defined(LOADERRAMADDR) /* Check if we're running from flash. */ ldr r7, =FLASHADDR /* * If we're running with MMU disabled, test against the * physical address instead. */ mrc p15, 0, r2, c1, c0, 0 ands r2, r2, #CPU_CONTROL_MMU_ENABLE ldreq r6, =PHYSADDR ldrne r6, =LOADERRAMADDR cmp r7, r6 bls flash_lower cmp r7, pc bhi from_ram b do_copy flash_lower: cmp r6, pc bls from_ram do_copy: ldr r7, =KERNBASE adr r1, _start ldr r0, Lreal_start ldr r2, Lend sub r2, r2, r0 sub r0, r0, r7 add r0, r0, r6 mov r4, r0 bl memcpy ldr r0, Lram_offset add pc, r4, r0 Lram_offset: .word from_ram-_C_LABEL(_start) from_ram: nop #endif adr r7, Lunmapped bic r7, r7, #0xf0000000 orr r7, r7, #PHYSADDR disable_mmu: /* Disable MMU for a while */ mrc p15, 0, r2, c1, c0, 0 bic r2, r2, #(CPU_CONTROL_MMU_ENABLE | CPU_CONTROL_DC_ENABLE |\ CPU_CONTROL_WBUF_ENABLE) bic r2, r2, #(CPU_CONTROL_IC_ENABLE) bic r2, r2, #(CPU_CONTROL_BPRD_ENABLE) mcr p15, 0, r2, c1, c0, 0 nop nop nop mov pc, r7 Lunmapped: /* * Build page table from scratch. */ /* Find the delta between VA and PA */ adr r0, Lpagetable ldr r1, [r0] sub r2, r1, r0 /* At this point: r2 = VA - PA */ /* * Find the physical address of the table. After these two * instructions: * r1 = va(pagetable) * * r0 = va(pagetable) - (VA - PA) * = va(pagetable) - VA + PA * = pa(pagetable) */ ldr r1, [r0, #4] sub r0, r1, r2 /* * Map PA == VA */ /* Find the start kernels load address */ adr r5, _start ldr r2, =(L1_S_OFFSET) bic r5, r2 mov r1, r5 mov r2, r5 /* Map 64MiB, preserved over calls to build_pagetables */ mov r3, #64 bl build_pagetables /* Create the kernel map to jump to */ mov r1, r5 ldr r2, =(KERNVIRTADDR) bl build_pagetables #if defined(SOCDEV_PA) && defined(SOCDEV_VA) /* Create the custom map */ ldr r1, =SOCDEV_PA ldr r2, =SOCDEV_VA bl build_pagetables #endif #if defined(SMP) orr r0, r0, #2 /* Set TTB shared memory flag */ #endif mcr p15, 0, r0, c2, c0, 0 /* Set TTB */ mcr p15, 0, r0, c8, c7, 0 /* Flush TLB */ #if defined(CPU_ARM1136) || defined(CPU_ARM1176) || defined(CPU_CORTEXA) || defined(CPU_MV_PJ4B) || defined(CPU_KRAIT) mov r0, #0 mcr p15, 0, r0, c13, c0, 1 /* Set ASID to 0 */ #endif /* Set the Domain Access register. Very important! */ mov r0, #((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)) | DOMAIN_CLIENT) mcr p15, 0, r0, c3, c0, 0 /* * Enable MMU. * On armv6 enable extended page tables, and set alignment checking * to modulo-4 (CPU_CONTROL_UNAL_ENABLE) for the ldrd/strd * instructions emitted by clang. */ mrc p15, 0, r0, c1, c0, 0 #ifdef _ARM_ARCH_6 orr r0, r0, #(CPU_CONTROL_V6_EXTPAGE | CPU_CONTROL_UNAL_ENABLE) orr r0, r0, #(CPU_CONTROL_AFLT_ENABLE) orr r0, r0, #(CPU_CONTROL_AF_ENABLE) #endif orr r0, r0, #(CPU_CONTROL_MMU_ENABLE) mcr p15, 0, r0, c1, c0, 0 nop nop nop CPWAIT(r0) mmu_done: nop adr r1, .Lstart ldmia r1, {r1, r2, sp} /* Set initial stack and */ sub r2, r2, r1 /* get zero init data */ mov r3, #0 .L1: str r3, [r1], #0x0004 /* get zero init data */ subs r2, r2, #4 bgt .L1 ldr pc, .Lvirt_done virt_done: mov r1, #28 /* loader info size is 28 bytes also second arg */ subs sp, sp, r1 /* allocate arm_boot_params struct on stack */ mov r0, sp /* loader info pointer is first arg */ bic sp, sp, #7 /* align stack to 8 bytes */ str r1, [r0] /* Store length of loader info */ str r9, [r0, #4] /* Store r0 from boot loader */ str r8, [r0, #8] /* Store r1 from boot loader */ str ip, [r0, #12] /* store r2 from boot loader */ str fp, [r0, #16] /* store r3 from boot loader */ str r5, [r0, #20] /* store the physical address */ adr r4, Lpagetable /* load the pagetable address */ ldr r5, [r4, #4] str r5, [r0, #24] /* store the pagetable address */ mov fp, #0 /* trace back starts here */ bl _C_LABEL(initarm) /* Off we go */ /* init arm will return the new stack pointer. */ mov sp, r0 bl _C_LABEL(mi_startup) /* call mi_startup()! */ adr r0, .Lmainreturned b _C_LABEL(panic) /* NOTREACHED */ -END(btext) END(_start) /* * Builds the page table * r0 - The table base address * r1 - The physical address (trashed) * r2 - The virtual address (trashed) * r3 - The number of 1MiB sections * r4 - Trashed * * Addresses must be 1MiB aligned */ build_pagetables: /* Set the required page attributed */ ldr r4, =(L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)) #if defined(SMP) orr r4, #(L1_SHARED) #endif orr r1, r4 /* Move the virtual address to the correct bit location */ lsr r2, #(L1_S_SHIFT - 2) mov r4, r3 1: str r1, [r0, r2] add r2, r2, #4 add r1, r1, #(L1_S_SIZE) adds r4, r4, #-1 bhi 1b RET Lpagetable: .word . .word pagetable Lvirtaddr: .word KERNVIRTADDR Lphysaddr: .word KERNPHYSADDR Lreal_start: .word _start Lend: .word _edata .Lstart: .word _edata .word _ebss .word svcstk + INIT_ARM_STACK_SIZE .Lvirt_done: .word virt_done .Lmainreturned: .asciz "main() returned" .align 0 .bss svcstk: .space INIT_ARM_STACK_SIZE /* * Memory for the initial pagetable. We are unable to place this in * the bss as this will be cleared after the table is loaded. */ .section ".init_pagetable" .align 14 /* 16KiB aligned */ pagetable: .space L1_TABLE_SIZE .text .align 0 .Lcpufuncs: .word _C_LABEL(cpufuncs) #if defined(SMP) .Lmpvirt_done: .word mpvirt_done Lstartup_pagetable_secondary: .word temp_pagetable ASENTRY_NP(mpentry) /* Make sure interrupts are disabled. */ mrs r7, cpsr orr r7, r7, #(I32_bit|F32_bit) msr cpsr_c, r7 /* Disable MMU. It should be disabled already, but make sure. */ mrc p15, 0, r2, c1, c0, 0 bic r2, r2, #(CPU_CONTROL_MMU_ENABLE | CPU_CONTROL_DC_ENABLE |\ CPU_CONTROL_WBUF_ENABLE) bic r2, r2, #(CPU_CONTROL_IC_ENABLE) bic r2, r2, #(CPU_CONTROL_BPRD_ENABLE) mcr p15, 0, r2, c1, c0, 0 nop nop nop CPWAIT(r0) #if defined(ARM_MMU_V6) bl armv6_idcache_inv_all /* Modifies r0 only */ #elif defined(ARM_MMU_V7) bl armv7_idcache_inv_all /* Modifies r0-r3, ip */ #endif ldr r0, Lstartup_pagetable_secondary bic r0, r0, #0xf0000000 orr r0, r0, #PHYSADDR ldr r0, [r0] orr r0, r0, #2 /* Set TTB shared memory flag */ mcr p15, 0, r0, c2, c0, 0 /* Set TTB */ mcr p15, 0, r0, c8, c7, 0 /* Flush TLB */ mov r0, #0 mcr p15, 0, r0, c13, c0, 1 /* Set ASID to 0 */ /* Set the Domain Access register. Very important! */ mov r0, #((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)) | DOMAIN_CLIENT) mcr p15, 0, r0, c3, c0, 0 /* Enable MMU */ mrc p15, 0, r0, c1, c0, 0 orr r0, r0, #CPU_CONTROL_V6_EXTPAGE orr r0, r0, #CPU_CONTROL_AF_ENABLE orr r0, r0, #(CPU_CONTROL_MMU_ENABLE | CPU_CONTROL_DC_ENABLE |\ CPU_CONTROL_WBUF_ENABLE) orr r0, r0, #(CPU_CONTROL_IC_ENABLE) orr r0, r0, #(CPU_CONTROL_BPRD_ENABLE) mcr p15, 0, r0, c1, c0, 0 nop nop nop CPWAIT(r0) adr r1, .Lstart ldmia r1, {r1, r2, sp} /* Set initial stack and */ mrc p15, 0, r0, c0, c0, 5 and r0, r0, #15 mov r1, #2048 mul r2, r1, r0 sub sp, sp, r2 str r1, [sp] ldr pc, .Lmpvirt_done mpvirt_done: mov fp, #0 /* trace back starts here */ bl _C_LABEL(init_secondary) /* Off we go */ adr r0, .Lmpreturned b _C_LABEL(panic) /* NOTREACHED */ .Lmpreturned: .asciz "init_secondary() returned" .align 0 END(mpentry) #endif ENTRY_NP(cpu_halt) mrs r2, cpsr bic r2, r2, #(PSR_MODE) orr r2, r2, #(PSR_SVC32_MODE) orr r2, r2, #(I32_bit | F32_bit) msr cpsr_fsxc, r2 ldr r4, .Lcpu_reset_address ldr r4, [r4] ldr r0, .Lcpufuncs mov lr, pc ldr pc, [r0, #CF_IDCACHE_WBINV_ALL] mov lr, pc ldr pc, [r0, #CF_L2CACHE_WBINV_ALL] /* * Load the cpu_reset_needs_v4_MMU_disable flag to determine if it's * necessary. */ ldr r1, .Lcpu_reset_needs_v4_MMU_disable ldr r1, [r1] cmp r1, #0 mov r2, #0 /* * MMU & IDC off, 32 bit program & data space * Hurl ourselves into the ROM */ mov r0, #(CPU_CONTROL_32BP_ENABLE | CPU_CONTROL_32BD_ENABLE) mcr 15, 0, r0, c1, c0, 0 mcrne 15, 0, r2, c8, c7, 0 /* nail I+D TLB on ARMv4 and greater */ mov pc, r4 /* * _cpu_reset_address contains the address to branch to, to complete * the cpu reset after turning the MMU off * This variable is provided by the hardware specific code */ .Lcpu_reset_address: .word _C_LABEL(cpu_reset_address) /* * cpu_reset_needs_v4_MMU_disable contains a flag that signals if the * v4 MMU disable instruction needs executing... it is an illegal instruction * on f.e. ARM6/7 that locks up the computer in an endless illegal * instruction / data-abort / reset loop. */ .Lcpu_reset_needs_v4_MMU_disable: .word _C_LABEL(cpu_reset_needs_v4_MMU_disable) END(cpu_halt) /* * setjump + longjmp */ ENTRY(setjmp) stmia r0, {r4-r14} mov r0, #0x00000000 RET END(setjmp) ENTRY(longjmp) ldmia r0, {r4-r14} mov r0, #0x00000001 RET END(longjmp) .data .global _C_LABEL(esym) _C_LABEL(esym): .word _C_LABEL(end) ENTRY_NP(abort) b _C_LABEL(abort) END(abort) ENTRY_NP(sigcode) mov r0, sp add r0, r0, #SIGF_UC /* * Call the sigreturn system call. * * We have to load r7 manually rather than using * "ldr r7, =SYS_sigreturn" to ensure the value of szsigcode is * correct. Using the alternative places esigcode at the address * of the data rather than the address one past the data. */ ldr r7, [pc, #12] /* Load SYS_sigreturn */ swi SYS_sigreturn /* Well if that failed we better exit quick ! */ ldr r7, [pc, #8] /* Load SYS_exit */ swi SYS_exit /* Branch back to retry SYS_sigreturn */ b . - 16 - +END(sigcode) .word SYS_sigreturn .word SYS_exit .align 0 .global _C_LABEL(esigcode) _C_LABEL(esigcode): .data .global szsigcode szsigcode: .long esigcode-sigcode -END(sigcode) + /* End of locore.S */ Index: stable/10/sys/arm/arm/setstack.s =================================================================== --- stable/10/sys/arm/arm/setstack.s (revision 269795) +++ stable/10/sys/arm/arm/setstack.s (revision 269796) @@ -1,94 +1,94 @@ /* $NetBSD: setstack.S,v 1.1 2001/07/28 13:28:03 chris Exp $ */ /*- * Copyright (c) 1994 Mark Brinicombe. * Copyright (c) 1994 Brini. * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Brini. * 4. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * RiscBSD kernel project * * setstack.S * * Miscellaneous routine to play with the stack pointer in different CPU modes * * Eventually this routine can be inline assembly. * * Created : 17/09/94 * * Based of kate/display/setstack.s * */ #include #include __FBSDID("$FreeBSD$"); /* To set the stack pointer for a particular mode we must switch * to that mode update the banked r13 and then switch back. * This routine provides an easy way of doing this for any mode * * r0 = CPU mode * r1 = stackptr */ ENTRY(set_stackptr) mrs r3, cpsr /* Switch to the appropriate mode */ bic r2, r3, #(PSR_MODE) orr r2, r2, r0 msr cpsr_fsxc, r2 mov sp, r1 /* Set the stack pointer */ msr cpsr_fsxc, r3 /* Restore the old mode */ mov pc, lr /* Exit */ - +END(set_stackptr) /* To get the stack pointer for a particular mode we must switch * to that mode copy the banked r13 and then switch back. * This routine provides an easy way of doing this for any mode * * r0 = CPU mode */ ENTRY(get_stackptr) mrs r3, cpsr /* Switch to the appropriate mode */ bic r2, r3, #(PSR_MODE) orr r2, r2, r0 msr cpsr_fsxc, r2 mov r0, sp /* Set the stack pointer */ msr cpsr_fsxc, r3 /* Restore the old mode */ mov pc, lr /* Exit */ - +END(get_stackptr) /* End of setstack.S */ Index: stable/10/sys/arm/arm/support.S =================================================================== --- stable/10/sys/arm/arm/support.S (revision 269795) +++ stable/10/sys/arm/arm/support.S (revision 269796) @@ -1,2957 +1,2960 @@ /*- * Copyright (c) 2004 Olivier Houchard * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright 2003 Wasabi Systems, Inc. * All rights reserved. * * Written by Steve C. Woodford for Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project by * Wasabi Systems, Inc. * 4. The name of Wasabi Systems, Inc. may not be used to endorse * or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1997 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Neil A. Carson and Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "assym.s" .L_arm_memcpy: .word _C_LABEL(_arm_memcpy) .L_arm_bzero: .word _C_LABEL(_arm_bzero) .L_min_memcpy_size: .word _C_LABEL(_min_memcpy_size) .L_min_bzero_size: .word _C_LABEL(_min_bzero_size) /* * memset: Sets a block of memory to the specified value * * On entry: * r0 - dest address * r1 - byte to write * r2 - number of bytes to write * * On exit: * r0 - dest address */ /* LINTSTUB: Func: void bzero(void *, size_t) */ ENTRY(bzero) ldr r3, .L_arm_bzero ldr r3, [r3] cmp r3, #0 beq .Lnormal0 ldr r2, .L_min_bzero_size ldr r2, [r2] cmp r1, r2 blt .Lnormal0 stmfd sp!, {r0, r1, lr} mov r2, #0 mov lr, pc mov pc, r3 cmp r0, #0 ldmfd sp!, {r0, r1, lr} RETeq .Lnormal0: mov r3, #0x00 b do_memset - +EEND(bzero) /* LINTSTUB: Func: void *memset(void *, int, size_t) */ ENTRY(memset) and r3, r1, #0xff /* We deal with bytes */ mov r1, r2 do_memset: cmp r1, #0x04 /* Do we have less than 4 bytes */ mov ip, r0 blt .Lmemset_lessthanfour /* Ok first we will word align the address */ ands r2, ip, #0x03 /* Get the bottom two bits */ bne .Lmemset_wordunaligned /* The address is not word aligned */ /* We are now word aligned */ .Lmemset_wordaligned: orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */ #ifdef _ARM_ARCH_5E tst ip, #0x04 /* Quad-align for armv5e */ #else cmp r1, #0x10 #endif orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */ #ifdef _ARM_ARCH_5E subne r1, r1, #0x04 /* Quad-align if necessary */ strne r3, [ip], #0x04 cmp r1, #0x10 #endif blt .Lmemset_loop4 /* If less than 16 then use words */ mov r2, r3 /* Duplicate data */ cmp r1, #0x80 /* If < 128 then skip the big loop */ blt .Lmemset_loop32 /* Do 128 bytes at a time */ .Lmemset_loop128: subs r1, r1, #0x80 #ifdef _ARM_ARCH_5E strged r2, [ip], #0x08 strged r2, [ip], #0x08 strged r2, [ip], #0x08 strged r2, [ip], #0x08 strged r2, [ip], #0x08 strged r2, [ip], #0x08 strged r2, [ip], #0x08 strged r2, [ip], #0x08 strged r2, [ip], #0x08 strged r2, [ip], #0x08 strged r2, [ip], #0x08 strged r2, [ip], #0x08 strged r2, [ip], #0x08 strged r2, [ip], #0x08 strged r2, [ip], #0x08 strged r2, [ip], #0x08 #else stmgeia ip!, {r2-r3} stmgeia ip!, {r2-r3} stmgeia ip!, {r2-r3} stmgeia ip!, {r2-r3} stmgeia ip!, {r2-r3} stmgeia ip!, {r2-r3} stmgeia ip!, {r2-r3} stmgeia ip!, {r2-r3} stmgeia ip!, {r2-r3} stmgeia ip!, {r2-r3} stmgeia ip!, {r2-r3} stmgeia ip!, {r2-r3} stmgeia ip!, {r2-r3} stmgeia ip!, {r2-r3} stmgeia ip!, {r2-r3} stmgeia ip!, {r2-r3} #endif bgt .Lmemset_loop128 RETeq /* Zero length so just exit */ add r1, r1, #0x80 /* Adjust for extra sub */ /* Do 32 bytes at a time */ .Lmemset_loop32: subs r1, r1, #0x20 #ifdef _ARM_ARCH_5E strged r2, [ip], #0x08 strged r2, [ip], #0x08 strged r2, [ip], #0x08 strged r2, [ip], #0x08 #else stmgeia ip!, {r2-r3} stmgeia ip!, {r2-r3} stmgeia ip!, {r2-r3} stmgeia ip!, {r2-r3} #endif bgt .Lmemset_loop32 RETeq /* Zero length so just exit */ adds r1, r1, #0x10 /* Partially adjust for extra sub */ /* Deal with 16 bytes or more */ #ifdef _ARM_ARCH_5E strged r2, [ip], #0x08 strged r2, [ip], #0x08 #else stmgeia ip!, {r2-r3} stmgeia ip!, {r2-r3} #endif RETeq /* Zero length so just exit */ addlt r1, r1, #0x10 /* Possibly adjust for extra sub */ /* We have at least 4 bytes so copy as words */ .Lmemset_loop4: subs r1, r1, #0x04 strge r3, [ip], #0x04 bgt .Lmemset_loop4 RETeq /* Zero length so just exit */ #ifdef _ARM_ARCH_5E /* Compensate for 64-bit alignment check */ adds r1, r1, #0x04 RETeq cmp r1, #2 #else cmp r1, #-2 #endif strb r3, [ip], #0x01 /* Set 1 byte */ strgeb r3, [ip], #0x01 /* Set another byte */ strgtb r3, [ip] /* and a third */ RET /* Exit */ .Lmemset_wordunaligned: rsb r2, r2, #0x004 strb r3, [ip], #0x01 /* Set 1 byte */ cmp r2, #0x02 strgeb r3, [ip], #0x01 /* Set another byte */ sub r1, r1, r2 strgtb r3, [ip], #0x01 /* and a third */ cmp r1, #0x04 /* More than 4 bytes left? */ bge .Lmemset_wordaligned /* Yup */ .Lmemset_lessthanfour: cmp r1, #0x00 RETeq /* Zero length so exit */ strb r3, [ip], #0x01 /* Set 1 byte */ cmp r1, #0x02 strgeb r3, [ip], #0x01 /* Set another byte */ strgtb r3, [ip] /* and a third */ RET /* Exit */ -END(bzero) END(memset) ENTRY(bcmp) mov ip, r0 cmp r2, #0x06 beq .Lmemcmp_6bytes mov r0, #0x00 /* Are both addresses aligned the same way? */ cmp r2, #0x00 eornes r3, ip, r1 RETeq /* len == 0, or same addresses! */ tst r3, #0x03 subne r2, r2, #0x01 bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */ /* Word-align the addresses, if necessary */ sub r3, r1, #0x05 ands r3, r3, #0x03 add r3, r3, r3, lsl #1 addne pc, pc, r3, lsl #3 nop /* Compare up to 3 bytes */ ldrb r0, [ip], #0x01 ldrb r3, [r1], #0x01 subs r0, r0, r3 RETne subs r2, r2, #0x01 RETeq /* Compare up to 2 bytes */ ldrb r0, [ip], #0x01 ldrb r3, [r1], #0x01 subs r0, r0, r3 RETne subs r2, r2, #0x01 RETeq /* Compare 1 byte */ ldrb r0, [ip], #0x01 ldrb r3, [r1], #0x01 subs r0, r0, r3 RETne subs r2, r2, #0x01 RETeq /* Compare 4 bytes at a time, if possible */ subs r2, r2, #0x04 bcc .Lmemcmp_bytewise .Lmemcmp_word_aligned: ldr r0, [ip], #0x04 ldr r3, [r1], #0x04 subs r2, r2, #0x04 cmpcs r0, r3 beq .Lmemcmp_word_aligned sub r0, r0, r3 /* Correct for extra subtraction, and check if done */ adds r2, r2, #0x04 cmpeq r0, #0x00 /* If done, did all bytes match? */ RETeq /* Yup. Just return */ /* Re-do the final word byte-wise */ sub ip, ip, #0x04 sub r1, r1, #0x04 .Lmemcmp_bytewise: add r2, r2, #0x03 .Lmemcmp_bytewise2: ldrb r0, [ip], #0x01 ldrb r3, [r1], #0x01 subs r2, r2, #0x01 cmpcs r0, r3 beq .Lmemcmp_bytewise2 sub r0, r0, r3 RET /* * 6 byte compares are very common, thanks to the network stack. * This code is hand-scheduled to reduce the number of stalls for * load results. Everything else being equal, this will be ~32% * faster than a byte-wise memcmp. */ .align 5 .Lmemcmp_6bytes: ldrb r3, [r1, #0x00] /* r3 = b2#0 */ ldrb r0, [ip, #0x00] /* r0 = b1#0 */ ldrb r2, [r1, #0x01] /* r2 = b2#1 */ subs r0, r0, r3 /* r0 = b1#0 - b2#0 */ ldreqb r3, [ip, #0x01] /* r3 = b1#1 */ RETne /* Return if mismatch on #0 */ subs r0, r3, r2 /* r0 = b1#1 - b2#1 */ ldreqb r3, [r1, #0x02] /* r3 = b2#2 */ ldreqb r0, [ip, #0x02] /* r0 = b1#2 */ RETne /* Return if mismatch on #1 */ ldrb r2, [r1, #0x03] /* r2 = b2#3 */ subs r0, r0, r3 /* r0 = b1#2 - b2#2 */ ldreqb r3, [ip, #0x03] /* r3 = b1#3 */ RETne /* Return if mismatch on #2 */ subs r0, r3, r2 /* r0 = b1#3 - b2#3 */ ldreqb r3, [r1, #0x04] /* r3 = b2#4 */ ldreqb r0, [ip, #0x04] /* r0 = b1#4 */ RETne /* Return if mismatch on #3 */ ldrb r2, [r1, #0x05] /* r2 = b2#5 */ subs r0, r0, r3 /* r0 = b1#4 - b2#4 */ ldreqb r3, [ip, #0x05] /* r3 = b1#5 */ RETne /* Return if mismatch on #4 */ sub r0, r3, r2 /* r0 = b1#5 - b2#5 */ RET END(bcmp) ENTRY(bcopy) /* switch the source and destination registers */ eor r0, r1, r0 eor r1, r0, r1 eor r0, r1, r0 -ENTRY(memmove) +EENTRY(memmove) /* Do the buffers overlap? */ cmp r0, r1 RETeq /* Bail now if src/dst are the same */ subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */ subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */ cmp r3, r2 /* if (r3 < len) we have an overlap */ bcc PIC_SYM(_C_LABEL(memcpy), PLT) /* Determine copy direction */ cmp r1, r0 bcc .Lmemmove_backwards moveq r0, #0 /* Quick abort for len=0 */ RETeq stmdb sp!, {r0, lr} /* memmove() returns dest addr */ subs r2, r2, #4 blt .Lmemmove_fl4 /* less than 4 bytes */ ands r12, r0, #3 bne .Lmemmove_fdestul /* oh unaligned destination addr */ ands r12, r1, #3 bne .Lmemmove_fsrcul /* oh unaligned source addr */ .Lmemmove_ft8: /* We have aligned source and destination */ subs r2, r2, #8 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ subs r2, r2, #0x14 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ stmdb sp!, {r4} /* borrow r4 */ /* blat 32 bytes at a time */ /* XXX for really big copies perhaps we should use more registers */ .Lmemmove_floop32: ldmia r1!, {r3, r4, r12, lr} stmia r0!, {r3, r4, r12, lr} ldmia r1!, {r3, r4, r12, lr} stmia r0!, {r3, r4, r12, lr} subs r2, r2, #0x20 bge .Lmemmove_floop32 cmn r2, #0x10 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ stmgeia r0!, {r3, r4, r12, lr} subge r2, r2, #0x10 ldmia sp!, {r4} /* return r4 */ .Lmemmove_fl32: adds r2, r2, #0x14 /* blat 12 bytes at a time */ .Lmemmove_floop12: ldmgeia r1!, {r3, r12, lr} stmgeia r0!, {r3, r12, lr} subges r2, r2, #0x0c bge .Lmemmove_floop12 .Lmemmove_fl12: adds r2, r2, #8 blt .Lmemmove_fl4 subs r2, r2, #4 ldrlt r3, [r1], #4 strlt r3, [r0], #4 ldmgeia r1!, {r3, r12} stmgeia r0!, {r3, r12} subge r2, r2, #4 .Lmemmove_fl4: /* less than 4 bytes to go */ adds r2, r2, #4 ldmeqia sp!, {r0, pc} /* done */ /* copy the crud byte at a time */ cmp r2, #2 ldrb r3, [r1], #1 strb r3, [r0], #1 ldrgeb r3, [r1], #1 strgeb r3, [r0], #1 ldrgtb r3, [r1], #1 strgtb r3, [r0], #1 ldmia sp!, {r0, pc} /* erg - unaligned destination */ .Lmemmove_fdestul: rsb r12, r12, #4 cmp r12, #2 /* align destination with byte copies */ ldrb r3, [r1], #1 strb r3, [r0], #1 ldrgeb r3, [r1], #1 strgeb r3, [r0], #1 ldrgtb r3, [r1], #1 strgtb r3, [r0], #1 subs r2, r2, r12 blt .Lmemmove_fl4 /* less the 4 bytes */ ands r12, r1, #3 beq .Lmemmove_ft8 /* we have an aligned source */ /* erg - unaligned source */ /* This is where it gets nasty ... */ .Lmemmove_fsrcul: bic r1, r1, #3 ldr lr, [r1], #4 cmp r12, #2 bgt .Lmemmove_fsrcul3 beq .Lmemmove_fsrcul2 cmp r2, #0x0c blt .Lmemmove_fsrcul1loop4 sub r2, r2, #0x0c stmdb sp!, {r4, r5} .Lmemmove_fsrcul1loop16: #ifdef __ARMEB__ mov r3, lr, lsl #8 #else mov r3, lr, lsr #8 #endif ldmia r1!, {r4, r5, r12, lr} #ifdef __ARMEB__ orr r3, r3, r4, lsr #24 mov r4, r4, lsl #8 orr r4, r4, r5, lsr #24 mov r5, r5, lsl #8 orr r5, r5, r12, lsr #24 mov r12, r12, lsl #8 orr r12, r12, lr, lsr #24 #else orr r3, r3, r4, lsl #24 mov r4, r4, lsr #8 orr r4, r4, r5, lsl #24 mov r5, r5, lsr #8 orr r5, r5, r12, lsl #24 mov r12, r12, lsr #8 orr r12, r12, lr, lsl #24 #endif stmia r0!, {r3-r5, r12} subs r2, r2, #0x10 bge .Lmemmove_fsrcul1loop16 ldmia sp!, {r4, r5} adds r2, r2, #0x0c blt .Lmemmove_fsrcul1l4 .Lmemmove_fsrcul1loop4: #ifdef __ARMEB__ mov r12, lr, lsl #8 #else mov r12, lr, lsr #8 #endif ldr lr, [r1], #4 #ifdef __ARMEB__ orr r12, r12, lr, lsr #24 #else orr r12, r12, lr, lsl #24 #endif str r12, [r0], #4 subs r2, r2, #4 bge .Lmemmove_fsrcul1loop4 .Lmemmove_fsrcul1l4: sub r1, r1, #3 b .Lmemmove_fl4 .Lmemmove_fsrcul2: cmp r2, #0x0c blt .Lmemmove_fsrcul2loop4 sub r2, r2, #0x0c stmdb sp!, {r4, r5} .Lmemmove_fsrcul2loop16: #ifdef __ARMEB__ mov r3, lr, lsl #16 #else mov r3, lr, lsr #16 #endif ldmia r1!, {r4, r5, r12, lr} #ifdef __ARMEB__ orr r3, r3, r4, lsr #16 mov r4, r4, lsl #16 orr r4, r4, r5, lsr #16 mov r5, r5, lsl #16 orr r5, r5, r12, lsr #16 mov r12, r12, lsl #16 orr r12, r12, lr, lsr #16 #else orr r3, r3, r4, lsl #16 mov r4, r4, lsr #16 orr r4, r4, r5, lsl #16 mov r5, r5, lsr #16 orr r5, r5, r12, lsl #16 mov r12, r12, lsr #16 orr r12, r12, lr, lsl #16 #endif stmia r0!, {r3-r5, r12} subs r2, r2, #0x10 bge .Lmemmove_fsrcul2loop16 ldmia sp!, {r4, r5} adds r2, r2, #0x0c blt .Lmemmove_fsrcul2l4 .Lmemmove_fsrcul2loop4: #ifdef __ARMEB__ mov r12, lr, lsl #16 #else mov r12, lr, lsr #16 #endif ldr lr, [r1], #4 #ifdef __ARMEB__ orr r12, r12, lr, lsr #16 #else orr r12, r12, lr, lsl #16 #endif str r12, [r0], #4 subs r2, r2, #4 bge .Lmemmove_fsrcul2loop4 .Lmemmove_fsrcul2l4: sub r1, r1, #2 b .Lmemmove_fl4 .Lmemmove_fsrcul3: cmp r2, #0x0c blt .Lmemmove_fsrcul3loop4 sub r2, r2, #0x0c stmdb sp!, {r4, r5} .Lmemmove_fsrcul3loop16: #ifdef __ARMEB__ mov r3, lr, lsl #24 #else mov r3, lr, lsr #24 #endif ldmia r1!, {r4, r5, r12, lr} #ifdef __ARMEB__ orr r3, r3, r4, lsr #8 mov r4, r4, lsl #24 orr r4, r4, r5, lsr #8 mov r5, r5, lsl #24 orr r5, r5, r12, lsr #8 mov r12, r12, lsl #24 orr r12, r12, lr, lsr #8 #else orr r3, r3, r4, lsl #8 mov r4, r4, lsr #24 orr r4, r4, r5, lsl #8 mov r5, r5, lsr #24 orr r5, r5, r12, lsl #8 mov r12, r12, lsr #24 orr r12, r12, lr, lsl #8 #endif stmia r0!, {r3-r5, r12} subs r2, r2, #0x10 bge .Lmemmove_fsrcul3loop16 ldmia sp!, {r4, r5} adds r2, r2, #0x0c blt .Lmemmove_fsrcul3l4 .Lmemmove_fsrcul3loop4: #ifdef __ARMEB__ mov r12, lr, lsl #24 #else mov r12, lr, lsr #24 #endif ldr lr, [r1], #4 #ifdef __ARMEB__ orr r12, r12, lr, lsr #8 #else orr r12, r12, lr, lsl #8 #endif str r12, [r0], #4 subs r2, r2, #4 bge .Lmemmove_fsrcul3loop4 .Lmemmove_fsrcul3l4: sub r1, r1, #1 b .Lmemmove_fl4 .Lmemmove_backwards: add r1, r1, r2 add r0, r0, r2 subs r2, r2, #4 blt .Lmemmove_bl4 /* less than 4 bytes */ ands r12, r0, #3 bne .Lmemmove_bdestul /* oh unaligned destination addr */ ands r12, r1, #3 bne .Lmemmove_bsrcul /* oh unaligned source addr */ .Lmemmove_bt8: /* We have aligned source and destination */ subs r2, r2, #8 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ stmdb sp!, {r4, lr} subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ blt .Lmemmove_bl32 /* blat 32 bytes at a time */ /* XXX for really big copies perhaps we should use more registers */ .Lmemmove_bloop32: ldmdb r1!, {r3, r4, r12, lr} stmdb r0!, {r3, r4, r12, lr} ldmdb r1!, {r3, r4, r12, lr} stmdb r0!, {r3, r4, r12, lr} subs r2, r2, #0x20 bge .Lmemmove_bloop32 .Lmemmove_bl32: cmn r2, #0x10 ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ stmgedb r0!, {r3, r4, r12, lr} subge r2, r2, #0x10 adds r2, r2, #0x14 ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ stmgedb r0!, {r3, r12, lr} subge r2, r2, #0x0c ldmia sp!, {r4, lr} .Lmemmove_bl12: adds r2, r2, #8 blt .Lmemmove_bl4 subs r2, r2, #4 ldrlt r3, [r1, #-4]! strlt r3, [r0, #-4]! ldmgedb r1!, {r3, r12} stmgedb r0!, {r3, r12} subge r2, r2, #4 .Lmemmove_bl4: /* less than 4 bytes to go */ adds r2, r2, #4 RETeq /* done */ /* copy the crud byte at a time */ cmp r2, #2 ldrb r3, [r1, #-1]! strb r3, [r0, #-1]! ldrgeb r3, [r1, #-1]! strgeb r3, [r0, #-1]! ldrgtb r3, [r1, #-1]! strgtb r3, [r0, #-1]! RET /* erg - unaligned destination */ .Lmemmove_bdestul: cmp r12, #2 /* align destination with byte copies */ ldrb r3, [r1, #-1]! strb r3, [r0, #-1]! ldrgeb r3, [r1, #-1]! strgeb r3, [r0, #-1]! ldrgtb r3, [r1, #-1]! strgtb r3, [r0, #-1]! subs r2, r2, r12 blt .Lmemmove_bl4 /* less than 4 bytes to go */ ands r12, r1, #3 beq .Lmemmove_bt8 /* we have an aligned source */ /* erg - unaligned source */ /* This is where it gets nasty ... */ .Lmemmove_bsrcul: bic r1, r1, #3 ldr r3, [r1, #0] cmp r12, #2 blt .Lmemmove_bsrcul1 beq .Lmemmove_bsrcul2 cmp r2, #0x0c blt .Lmemmove_bsrcul3loop4 sub r2, r2, #0x0c stmdb sp!, {r4, r5, lr} .Lmemmove_bsrcul3loop16: #ifdef __ARMEB__ mov lr, r3, lsr #8 #else mov lr, r3, lsl #8 #endif ldmdb r1!, {r3-r5, r12} #ifdef __ARMEB__ orr lr, lr, r12, lsl #24 mov r12, r12, lsr #8 orr r12, r12, r5, lsl #24 mov r5, r5, lsr #8 orr r5, r5, r4, lsl #24 mov r4, r4, lsr #8 orr r4, r4, r3, lsl #24 #else orr lr, lr, r12, lsr #24 mov r12, r12, lsl #8 orr r12, r12, r5, lsr #24 mov r5, r5, lsl #8 orr r5, r5, r4, lsr #24 mov r4, r4, lsl #8 orr r4, r4, r3, lsr #24 #endif stmdb r0!, {r4, r5, r12, lr} subs r2, r2, #0x10 bge .Lmemmove_bsrcul3loop16 ldmia sp!, {r4, r5, lr} adds r2, r2, #0x0c blt .Lmemmove_bsrcul3l4 .Lmemmove_bsrcul3loop4: #ifdef __ARMEB__ mov r12, r3, lsr #8 #else mov r12, r3, lsl #8 #endif ldr r3, [r1, #-4]! #ifdef __ARMEB__ orr r12, r12, r3, lsl #24 #else orr r12, r12, r3, lsr #24 #endif str r12, [r0, #-4]! subs r2, r2, #4 bge .Lmemmove_bsrcul3loop4 .Lmemmove_bsrcul3l4: add r1, r1, #3 b .Lmemmove_bl4 .Lmemmove_bsrcul2: cmp r2, #0x0c blt .Lmemmove_bsrcul2loop4 sub r2, r2, #0x0c stmdb sp!, {r4, r5, lr} .Lmemmove_bsrcul2loop16: #ifdef __ARMEB__ mov lr, r3, lsr #16 #else mov lr, r3, lsl #16 #endif ldmdb r1!, {r3-r5, r12} #ifdef __ARMEB__ orr lr, lr, r12, lsl #16 mov r12, r12, lsr #16 orr r12, r12, r5, lsl #16 mov r5, r5, lsr #16 orr r5, r5, r4, lsl #16 mov r4, r4, lsr #16 orr r4, r4, r3, lsl #16 #else orr lr, lr, r12, lsr #16 mov r12, r12, lsl #16 orr r12, r12, r5, lsr #16 mov r5, r5, lsl #16 orr r5, r5, r4, lsr #16 mov r4, r4, lsl #16 orr r4, r4, r3, lsr #16 #endif stmdb r0!, {r4, r5, r12, lr} subs r2, r2, #0x10 bge .Lmemmove_bsrcul2loop16 ldmia sp!, {r4, r5, lr} adds r2, r2, #0x0c blt .Lmemmove_bsrcul2l4 .Lmemmove_bsrcul2loop4: #ifdef __ARMEB__ mov r12, r3, lsr #16 #else mov r12, r3, lsl #16 #endif ldr r3, [r1, #-4]! #ifdef __ARMEB__ orr r12, r12, r3, lsl #16 #else orr r12, r12, r3, lsr #16 #endif str r12, [r0, #-4]! subs r2, r2, #4 bge .Lmemmove_bsrcul2loop4 .Lmemmove_bsrcul2l4: add r1, r1, #2 b .Lmemmove_bl4 .Lmemmove_bsrcul1: cmp r2, #0x0c blt .Lmemmove_bsrcul1loop4 sub r2, r2, #0x0c stmdb sp!, {r4, r5, lr} .Lmemmove_bsrcul1loop32: #ifdef __ARMEB__ mov lr, r3, lsr #24 #else mov lr, r3, lsl #24 #endif ldmdb r1!, {r3-r5, r12} #ifdef __ARMEB__ orr lr, lr, r12, lsl #8 mov r12, r12, lsr #24 orr r12, r12, r5, lsl #8 mov r5, r5, lsr #24 orr r5, r5, r4, lsl #8 mov r4, r4, lsr #24 orr r4, r4, r3, lsl #8 #else orr lr, lr, r12, lsr #8 mov r12, r12, lsl #24 orr r12, r12, r5, lsr #8 mov r5, r5, lsl #24 orr r5, r5, r4, lsr #8 mov r4, r4, lsl #24 orr r4, r4, r3, lsr #8 #endif stmdb r0!, {r4, r5, r12, lr} subs r2, r2, #0x10 bge .Lmemmove_bsrcul1loop32 ldmia sp!, {r4, r5, lr} adds r2, r2, #0x0c blt .Lmemmove_bsrcul1l4 .Lmemmove_bsrcul1loop4: #ifdef __ARMEB__ mov r12, r3, lsr #24 #else mov r12, r3, lsl #24 #endif ldr r3, [r1, #-4]! #ifdef __ARMEB__ orr r12, r12, r3, lsl #8 #else orr r12, r12, r3, lsr #8 #endif str r12, [r0, #-4]! subs r2, r2, #4 bge .Lmemmove_bsrcul1loop4 .Lmemmove_bsrcul1l4: add r1, r1, #1 b .Lmemmove_bl4 +EEND(memmove) END(bcopy) -END(memmove) #if !defined(_ARM_ARCH_5E) ENTRY(memcpy) /* save leaf functions having to store this away */ /* Do not check arm_memcpy if we're running from flash */ #if defined(FLASHADDR) && defined(PHYSADDR) #if FLASHADDR > PHYSADDR ldr r3, =FLASHADDR cmp r3, pc bls .Lnormal #else ldr r3, =FLASHADDR cmp r3, pc bhi .Lnormal #endif #endif ldr r3, .L_arm_memcpy ldr r3, [r3] cmp r3, #0 beq .Lnormal ldr r3, .L_min_memcpy_size ldr r3, [r3] cmp r2, r3 blt .Lnormal stmfd sp!, {r0-r2, r4, lr} mov r3, #0 ldr r4, .L_arm_memcpy mov lr, pc ldr pc, [r4] cmp r0, #0 ldmfd sp!, {r0-r2, r4, lr} RETeq .Lnormal: stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ subs r2, r2, #4 blt .Lmemcpy_l4 /* less than 4 bytes */ ands r12, r0, #3 bne .Lmemcpy_destul /* oh unaligned destination addr */ ands r12, r1, #3 bne .Lmemcpy_srcul /* oh unaligned source addr */ .Lmemcpy_t8: /* We have aligned source and destination */ subs r2, r2, #8 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ subs r2, r2, #0x14 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ stmdb sp!, {r4} /* borrow r4 */ /* blat 32 bytes at a time */ /* XXX for really big copies perhaps we should use more registers */ .Lmemcpy_loop32: ldmia r1!, {r3, r4, r12, lr} stmia r0!, {r3, r4, r12, lr} ldmia r1!, {r3, r4, r12, lr} stmia r0!, {r3, r4, r12, lr} subs r2, r2, #0x20 bge .Lmemcpy_loop32 cmn r2, #0x10 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ stmgeia r0!, {r3, r4, r12, lr} subge r2, r2, #0x10 ldmia sp!, {r4} /* return r4 */ .Lmemcpy_l32: adds r2, r2, #0x14 /* blat 12 bytes at a time */ .Lmemcpy_loop12: ldmgeia r1!, {r3, r12, lr} stmgeia r0!, {r3, r12, lr} subges r2, r2, #0x0c bge .Lmemcpy_loop12 .Lmemcpy_l12: adds r2, r2, #8 blt .Lmemcpy_l4 subs r2, r2, #4 ldrlt r3, [r1], #4 strlt r3, [r0], #4 ldmgeia r1!, {r3, r12} stmgeia r0!, {r3, r12} subge r2, r2, #4 .Lmemcpy_l4: /* less than 4 bytes to go */ adds r2, r2, #4 #ifdef __APCS_26_ ldmeqia sp!, {r0, pc}^ /* done */ #else ldmeqia sp!, {r0, pc} /* done */ #endif /* copy the crud byte at a time */ cmp r2, #2 ldrb r3, [r1], #1 strb r3, [r0], #1 ldrgeb r3, [r1], #1 strgeb r3, [r0], #1 ldrgtb r3, [r1], #1 strgtb r3, [r0], #1 ldmia sp!, {r0, pc} /* erg - unaligned destination */ .Lmemcpy_destul: rsb r12, r12, #4 cmp r12, #2 /* align destination with byte copies */ ldrb r3, [r1], #1 strb r3, [r0], #1 ldrgeb r3, [r1], #1 strgeb r3, [r0], #1 ldrgtb r3, [r1], #1 strgtb r3, [r0], #1 subs r2, r2, r12 blt .Lmemcpy_l4 /* less the 4 bytes */ ands r12, r1, #3 beq .Lmemcpy_t8 /* we have an aligned source */ /* erg - unaligned source */ /* This is where it gets nasty ... */ .Lmemcpy_srcul: bic r1, r1, #3 ldr lr, [r1], #4 cmp r12, #2 bgt .Lmemcpy_srcul3 beq .Lmemcpy_srcul2 cmp r2, #0x0c blt .Lmemcpy_srcul1loop4 sub r2, r2, #0x0c stmdb sp!, {r4, r5} .Lmemcpy_srcul1loop16: mov r3, lr, lsr #8 ldmia r1!, {r4, r5, r12, lr} orr r3, r3, r4, lsl #24 mov r4, r4, lsr #8 orr r4, r4, r5, lsl #24 mov r5, r5, lsr #8 orr r5, r5, r12, lsl #24 mov r12, r12, lsr #8 orr r12, r12, lr, lsl #24 stmia r0!, {r3-r5, r12} subs r2, r2, #0x10 bge .Lmemcpy_srcul1loop16 ldmia sp!, {r4, r5} adds r2, r2, #0x0c blt .Lmemcpy_srcul1l4 .Lmemcpy_srcul1loop4: mov r12, lr, lsr #8 ldr lr, [r1], #4 orr r12, r12, lr, lsl #24 str r12, [r0], #4 subs r2, r2, #4 bge .Lmemcpy_srcul1loop4 .Lmemcpy_srcul1l4: sub r1, r1, #3 b .Lmemcpy_l4 .Lmemcpy_srcul2: cmp r2, #0x0c blt .Lmemcpy_srcul2loop4 sub r2, r2, #0x0c stmdb sp!, {r4, r5} .Lmemcpy_srcul2loop16: mov r3, lr, lsr #16 ldmia r1!, {r4, r5, r12, lr} orr r3, r3, r4, lsl #16 mov r4, r4, lsr #16 orr r4, r4, r5, lsl #16 mov r5, r5, lsr #16 orr r5, r5, r12, lsl #16 mov r12, r12, lsr #16 orr r12, r12, lr, lsl #16 stmia r0!, {r3-r5, r12} subs r2, r2, #0x10 bge .Lmemcpy_srcul2loop16 ldmia sp!, {r4, r5} adds r2, r2, #0x0c blt .Lmemcpy_srcul2l4 .Lmemcpy_srcul2loop4: mov r12, lr, lsr #16 ldr lr, [r1], #4 orr r12, r12, lr, lsl #16 str r12, [r0], #4 subs r2, r2, #4 bge .Lmemcpy_srcul2loop4 .Lmemcpy_srcul2l4: sub r1, r1, #2 b .Lmemcpy_l4 .Lmemcpy_srcul3: cmp r2, #0x0c blt .Lmemcpy_srcul3loop4 sub r2, r2, #0x0c stmdb sp!, {r4, r5} .Lmemcpy_srcul3loop16: mov r3, lr, lsr #24 ldmia r1!, {r4, r5, r12, lr} orr r3, r3, r4, lsl #8 mov r4, r4, lsr #24 orr r4, r4, r5, lsl #8 mov r5, r5, lsr #24 orr r5, r5, r12, lsl #8 mov r12, r12, lsr #24 orr r12, r12, lr, lsl #8 stmia r0!, {r3-r5, r12} subs r2, r2, #0x10 bge .Lmemcpy_srcul3loop16 ldmia sp!, {r4, r5} adds r2, r2, #0x0c blt .Lmemcpy_srcul3l4 .Lmemcpy_srcul3loop4: mov r12, lr, lsr #24 ldr lr, [r1], #4 orr r12, r12, lr, lsl #8 str r12, [r0], #4 subs r2, r2, #4 bge .Lmemcpy_srcul3loop4 .Lmemcpy_srcul3l4: sub r1, r1, #1 b .Lmemcpy_l4 END(memcpy) #else /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ ENTRY(memcpy) pld [r1] cmp r2, #0x0c ble .Lmemcpy_short /* <= 12 bytes */ #ifdef FLASHADDR #if FLASHADDR > PHYSADDR ldr r3, =FLASHADDR cmp r3, pc bls .Lnormal #else ldr r3, =FLASHADDR cmp r3, pc bhi .Lnormal #endif #endif ldr r3, .L_arm_memcpy ldr r3, [r3] cmp r3, #0 beq .Lnormal ldr r3, .L_min_memcpy_size ldr r3, [r3] cmp r2, r3 blt .Lnormal stmfd sp!, {r0-r2, r4, lr} mov r3, #0 ldr r4, .L_arm_memcpy mov lr, pc ldr pc, [r4] cmp r0, #0 ldmfd sp!, {r0-r2, r4, lr} RETeq .Lnormal: mov r3, r0 /* We must not clobber r0 */ /* Word-align the destination buffer */ ands ip, r3, #0x03 /* Already word aligned? */ beq .Lmemcpy_wordaligned /* Yup */ cmp ip, #0x02 ldrb ip, [r1], #0x01 sub r2, r2, #0x01 strb ip, [r3], #0x01 ldrleb ip, [r1], #0x01 suble r2, r2, #0x01 strleb ip, [r3], #0x01 ldrltb ip, [r1], #0x01 sublt r2, r2, #0x01 strltb ip, [r3], #0x01 /* Destination buffer is now word aligned */ .Lmemcpy_wordaligned: ands ip, r1, #0x03 /* Is src also word-aligned? */ bne .Lmemcpy_bad_align /* Nope. Things just got bad */ /* Quad-align the destination buffer */ tst r3, #0x07 /* Already quad aligned? */ ldrne ip, [r1], #0x04 stmfd sp!, {r4-r9} /* Free up some registers */ subne r2, r2, #0x04 strne ip, [r3], #0x04 /* Destination buffer quad aligned, source is at least word aligned */ subs r2, r2, #0x80 blt .Lmemcpy_w_lessthan128 /* Copy 128 bytes at a time */ .Lmemcpy_w_loop128: ldr r4, [r1], #0x04 /* LD:00-03 */ ldr r5, [r1], #0x04 /* LD:04-07 */ pld [r1, #0x18] /* Prefetch 0x20 */ ldr r6, [r1], #0x04 /* LD:08-0b */ ldr r7, [r1], #0x04 /* LD:0c-0f */ ldr r8, [r1], #0x04 /* LD:10-13 */ ldr r9, [r1], #0x04 /* LD:14-17 */ strd r4, [r3], #0x08 /* ST:00-07 */ ldr r4, [r1], #0x04 /* LD:18-1b */ ldr r5, [r1], #0x04 /* LD:1c-1f */ strd r6, [r3], #0x08 /* ST:08-0f */ ldr r6, [r1], #0x04 /* LD:20-23 */ ldr r7, [r1], #0x04 /* LD:24-27 */ pld [r1, #0x18] /* Prefetch 0x40 */ strd r8, [r3], #0x08 /* ST:10-17 */ ldr r8, [r1], #0x04 /* LD:28-2b */ ldr r9, [r1], #0x04 /* LD:2c-2f */ strd r4, [r3], #0x08 /* ST:18-1f */ ldr r4, [r1], #0x04 /* LD:30-33 */ ldr r5, [r1], #0x04 /* LD:34-37 */ strd r6, [r3], #0x08 /* ST:20-27 */ ldr r6, [r1], #0x04 /* LD:38-3b */ ldr r7, [r1], #0x04 /* LD:3c-3f */ strd r8, [r3], #0x08 /* ST:28-2f */ ldr r8, [r1], #0x04 /* LD:40-43 */ ldr r9, [r1], #0x04 /* LD:44-47 */ pld [r1, #0x18] /* Prefetch 0x60 */ strd r4, [r3], #0x08 /* ST:30-37 */ ldr r4, [r1], #0x04 /* LD:48-4b */ ldr r5, [r1], #0x04 /* LD:4c-4f */ strd r6, [r3], #0x08 /* ST:38-3f */ ldr r6, [r1], #0x04 /* LD:50-53 */ ldr r7, [r1], #0x04 /* LD:54-57 */ strd r8, [r3], #0x08 /* ST:40-47 */ ldr r8, [r1], #0x04 /* LD:58-5b */ ldr r9, [r1], #0x04 /* LD:5c-5f */ strd r4, [r3], #0x08 /* ST:48-4f */ ldr r4, [r1], #0x04 /* LD:60-63 */ ldr r5, [r1], #0x04 /* LD:64-67 */ pld [r1, #0x18] /* Prefetch 0x80 */ strd r6, [r3], #0x08 /* ST:50-57 */ ldr r6, [r1], #0x04 /* LD:68-6b */ ldr r7, [r1], #0x04 /* LD:6c-6f */ strd r8, [r3], #0x08 /* ST:58-5f */ ldr r8, [r1], #0x04 /* LD:70-73 */ ldr r9, [r1], #0x04 /* LD:74-77 */ strd r4, [r3], #0x08 /* ST:60-67 */ ldr r4, [r1], #0x04 /* LD:78-7b */ ldr r5, [r1], #0x04 /* LD:7c-7f */ strd r6, [r3], #0x08 /* ST:68-6f */ strd r8, [r3], #0x08 /* ST:70-77 */ subs r2, r2, #0x80 strd r4, [r3], #0x08 /* ST:78-7f */ bge .Lmemcpy_w_loop128 .Lmemcpy_w_lessthan128: adds r2, r2, #0x80 /* Adjust for extra sub */ ldmeqfd sp!, {r4-r9} RETeq /* Return now if done */ subs r2, r2, #0x20 blt .Lmemcpy_w_lessthan32 /* Copy 32 bytes at a time */ .Lmemcpy_w_loop32: ldr r4, [r1], #0x04 ldr r5, [r1], #0x04 pld [r1, #0x18] ldr r6, [r1], #0x04 ldr r7, [r1], #0x04 ldr r8, [r1], #0x04 ldr r9, [r1], #0x04 strd r4, [r3], #0x08 ldr r4, [r1], #0x04 ldr r5, [r1], #0x04 strd r6, [r3], #0x08 strd r8, [r3], #0x08 subs r2, r2, #0x20 strd r4, [r3], #0x08 bge .Lmemcpy_w_loop32 .Lmemcpy_w_lessthan32: adds r2, r2, #0x20 /* Adjust for extra sub */ ldmeqfd sp!, {r4-r9} RETeq /* Return now if done */ and r4, r2, #0x18 rsbs r4, r4, #0x18 addne pc, pc, r4, lsl #1 nop /* At least 24 bytes remaining */ ldr r4, [r1], #0x04 ldr r5, [r1], #0x04 sub r2, r2, #0x08 strd r4, [r3], #0x08 /* At least 16 bytes remaining */ ldr r4, [r1], #0x04 ldr r5, [r1], #0x04 sub r2, r2, #0x08 strd r4, [r3], #0x08 /* At least 8 bytes remaining */ ldr r4, [r1], #0x04 ldr r5, [r1], #0x04 subs r2, r2, #0x08 strd r4, [r3], #0x08 /* Less than 8 bytes remaining */ ldmfd sp!, {r4-r9} RETeq /* Return now if done */ subs r2, r2, #0x04 ldrge ip, [r1], #0x04 strge ip, [r3], #0x04 RETeq /* Return now if done */ addlt r2, r2, #0x04 ldrb ip, [r1], #0x01 cmp r2, #0x02 ldrgeb r2, [r1], #0x01 strb ip, [r3], #0x01 ldrgtb ip, [r1] strgeb r2, [r3], #0x01 strgtb ip, [r3] RET /* * At this point, it has not been possible to word align both buffers. * The destination buffer is word aligned, but the source buffer is not. */ .Lmemcpy_bad_align: stmfd sp!, {r4-r7} bic r1, r1, #0x03 cmp ip, #2 ldr ip, [r1], #0x04 bgt .Lmemcpy_bad3 beq .Lmemcpy_bad2 b .Lmemcpy_bad1 .Lmemcpy_bad1_loop16: #ifdef __ARMEB__ mov r4, ip, lsl #8 #else mov r4, ip, lsr #8 #endif ldr r5, [r1], #0x04 pld [r1, #0x018] ldr r6, [r1], #0x04 ldr r7, [r1], #0x04 ldr ip, [r1], #0x04 #ifdef __ARMEB__ orr r4, r4, r5, lsr #24 mov r5, r5, lsl #8 orr r5, r5, r6, lsr #24 mov r6, r6, lsl #8 orr r6, r6, r7, lsr #24 mov r7, r7, lsl #8 orr r7, r7, ip, lsr #24 #else orr r4, r4, r5, lsl #24 mov r5, r5, lsr #8 orr r5, r5, r6, lsl #24 mov r6, r6, lsr #8 orr r6, r6, r7, lsl #24 mov r7, r7, lsr #8 orr r7, r7, ip, lsl #24 #endif str r4, [r3], #0x04 str r5, [r3], #0x04 str r6, [r3], #0x04 str r7, [r3], #0x04 .Lmemcpy_bad1: subs r2, r2, #0x10 bge .Lmemcpy_bad1_loop16 adds r2, r2, #0x10 ldmeqfd sp!, {r4-r7} RETeq /* Return now if done */ subs r2, r2, #0x04 sublt r1, r1, #0x03 blt .Lmemcpy_bad_done .Lmemcpy_bad1_loop4: #ifdef __ARMEB__ mov r4, ip, lsl #8 #else mov r4, ip, lsr #8 #endif ldr ip, [r1], #0x04 subs r2, r2, #0x04 #ifdef __ARMEB__ orr r4, r4, ip, lsr #24 #else orr r4, r4, ip, lsl #24 #endif str r4, [r3], #0x04 bge .Lmemcpy_bad1_loop4 sub r1, r1, #0x03 b .Lmemcpy_bad_done .Lmemcpy_bad2_loop16: #ifdef __ARMEB__ mov r4, ip, lsl #16 #else mov r4, ip, lsr #16 #endif ldr r5, [r1], #0x04 pld [r1, #0x018] ldr r6, [r1], #0x04 ldr r7, [r1], #0x04 ldr ip, [r1], #0x04 #ifdef __ARMEB__ orr r4, r4, r5, lsr #16 mov r5, r5, lsl #16 orr r5, r5, r6, lsr #16 mov r6, r6, lsl #16 orr r6, r6, r7, lsr #16 mov r7, r7, lsl #16 orr r7, r7, ip, lsr #16 #else orr r4, r4, r5, lsl #16 mov r5, r5, lsr #16 orr r5, r5, r6, lsl #16 mov r6, r6, lsr #16 orr r6, r6, r7, lsl #16 mov r7, r7, lsr #16 orr r7, r7, ip, lsl #16 #endif str r4, [r3], #0x04 str r5, [r3], #0x04 str r6, [r3], #0x04 str r7, [r3], #0x04 .Lmemcpy_bad2: subs r2, r2, #0x10 bge .Lmemcpy_bad2_loop16 adds r2, r2, #0x10 ldmeqfd sp!, {r4-r7} RETeq /* Return now if done */ subs r2, r2, #0x04 sublt r1, r1, #0x02 blt .Lmemcpy_bad_done .Lmemcpy_bad2_loop4: #ifdef __ARMEB__ mov r4, ip, lsl #16 #else mov r4, ip, lsr #16 #endif ldr ip, [r1], #0x04 subs r2, r2, #0x04 #ifdef __ARMEB__ orr r4, r4, ip, lsr #16 #else orr r4, r4, ip, lsl #16 #endif str r4, [r3], #0x04 bge .Lmemcpy_bad2_loop4 sub r1, r1, #0x02 b .Lmemcpy_bad_done .Lmemcpy_bad3_loop16: #ifdef __ARMEB__ mov r4, ip, lsl #24 #else mov r4, ip, lsr #24 #endif ldr r5, [r1], #0x04 pld [r1, #0x018] ldr r6, [r1], #0x04 ldr r7, [r1], #0x04 ldr ip, [r1], #0x04 #ifdef __ARMEB__ orr r4, r4, r5, lsr #8 mov r5, r5, lsl #24 orr r5, r5, r6, lsr #8 mov r6, r6, lsl #24 orr r6, r6, r7, lsr #8 mov r7, r7, lsl #24 orr r7, r7, ip, lsr #8 #else orr r4, r4, r5, lsl #8 mov r5, r5, lsr #24 orr r5, r5, r6, lsl #8 mov r6, r6, lsr #24 orr r6, r6, r7, lsl #8 mov r7, r7, lsr #24 orr r7, r7, ip, lsl #8 #endif str r4, [r3], #0x04 str r5, [r3], #0x04 str r6, [r3], #0x04 str r7, [r3], #0x04 .Lmemcpy_bad3: subs r2, r2, #0x10 bge .Lmemcpy_bad3_loop16 adds r2, r2, #0x10 ldmeqfd sp!, {r4-r7} RETeq /* Return now if done */ subs r2, r2, #0x04 sublt r1, r1, #0x01 blt .Lmemcpy_bad_done .Lmemcpy_bad3_loop4: #ifdef __ARMEB__ mov r4, ip, lsl #24 #else mov r4, ip, lsr #24 #endif ldr ip, [r1], #0x04 subs r2, r2, #0x04 #ifdef __ARMEB__ orr r4, r4, ip, lsr #8 #else orr r4, r4, ip, lsl #8 #endif str r4, [r3], #0x04 bge .Lmemcpy_bad3_loop4 sub r1, r1, #0x01 .Lmemcpy_bad_done: ldmfd sp!, {r4-r7} adds r2, r2, #0x04 RETeq ldrb ip, [r1], #0x01 cmp r2, #0x02 ldrgeb r2, [r1], #0x01 strb ip, [r3], #0x01 ldrgtb ip, [r1] strgeb r2, [r3], #0x01 strgtb ip, [r3] RET /* * Handle short copies (less than 16 bytes), possibly misaligned. * Some of these are *very* common, thanks to the network stack, * and so are handled specially. */ .Lmemcpy_short: add pc, pc, r2, lsl #2 nop RET /* 0x00 */ b .Lmemcpy_bytewise /* 0x01 */ b .Lmemcpy_bytewise /* 0x02 */ b .Lmemcpy_bytewise /* 0x03 */ b .Lmemcpy_4 /* 0x04 */ b .Lmemcpy_bytewise /* 0x05 */ b .Lmemcpy_6 /* 0x06 */ b .Lmemcpy_bytewise /* 0x07 */ b .Lmemcpy_8 /* 0x08 */ b .Lmemcpy_bytewise /* 0x09 */ b .Lmemcpy_bytewise /* 0x0a */ b .Lmemcpy_bytewise /* 0x0b */ b .Lmemcpy_c /* 0x0c */ .Lmemcpy_bytewise: mov r3, r0 /* We must not clobber r0 */ ldrb ip, [r1], #0x01 1: subs r2, r2, #0x01 strb ip, [r3], #0x01 ldrneb ip, [r1], #0x01 bne 1b RET /****************************************************************************** * Special case for 4 byte copies */ #define LMEMCPY_4_LOG2 6 /* 64 bytes */ #define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2 LMEMCPY_4_PAD .Lmemcpy_4: and r2, r1, #0x03 orr r2, r2, r0, lsl #2 ands r2, r2, #0x0f sub r3, pc, #0x14 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2 /* * 0000: dst is 32-bit aligned, src is 32-bit aligned */ ldr r2, [r1] str r2, [r0] RET LMEMCPY_4_PAD /* * 0001: dst is 32-bit aligned, src is 8-bit aligned */ ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */ #ifdef __ARMEB__ mov r3, r3, lsl #8 /* r3 = 012. */ orr r3, r3, r2, lsr #24 /* r3 = 0123 */ #else mov r3, r3, lsr #8 /* r3 = .210 */ orr r3, r3, r2, lsl #24 /* r3 = 3210 */ #endif str r3, [r0] RET LMEMCPY_4_PAD /* * 0010: dst is 32-bit aligned, src is 16-bit aligned */ #ifdef __ARMEB__ ldrh r3, [r1] ldrh r2, [r1, #0x02] #else ldrh r3, [r1, #0x02] ldrh r2, [r1] #endif orr r3, r2, r3, lsl #16 str r3, [r0] RET LMEMCPY_4_PAD /* * 0011: dst is 32-bit aligned, src is 8-bit aligned */ ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */ ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */ #ifdef __ARMEB__ mov r3, r3, lsl #24 /* r3 = 0... */ orr r3, r3, r2, lsr #8 /* r3 = 0123 */ #else mov r3, r3, lsr #24 /* r3 = ...0 */ orr r3, r3, r2, lsl #8 /* r3 = 3210 */ #endif str r3, [r0] RET LMEMCPY_4_PAD /* * 0100: dst is 8-bit aligned, src is 32-bit aligned */ ldr r2, [r1] #ifdef __ARMEB__ strb r2, [r0, #0x03] mov r3, r2, lsr #8 mov r1, r2, lsr #24 strb r1, [r0] #else strb r2, [r0] mov r3, r2, lsr #8 mov r1, r2, lsr #24 strb r1, [r0, #0x03] #endif strh r3, [r0, #0x01] RET LMEMCPY_4_PAD /* * 0101: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r2, [r1] ldrh r3, [r1, #0x01] ldrb r1, [r1, #0x03] strb r2, [r0] strh r3, [r0, #0x01] strb r1, [r0, #0x03] RET LMEMCPY_4_PAD /* * 0110: dst is 8-bit aligned, src is 16-bit aligned */ ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */ #ifdef __ARMEB__ mov r1, r2, lsr #8 /* r1 = ...0 */ strb r1, [r0] mov r2, r2, lsl #8 /* r2 = .01. */ orr r2, r2, r3, lsr #8 /* r2 = .012 */ #else strb r2, [r0] mov r2, r2, lsr #8 /* r2 = ...1 */ orr r2, r2, r3, lsl #8 /* r2 = .321 */ mov r3, r3, lsr #8 /* r3 = ...3 */ #endif strh r2, [r0, #0x01] strb r3, [r0, #0x03] RET LMEMCPY_4_PAD /* * 0111: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r2, [r1] ldrh r3, [r1, #0x01] ldrb r1, [r1, #0x03] strb r2, [r0] strh r3, [r0, #0x01] strb r1, [r0, #0x03] RET LMEMCPY_4_PAD /* * 1000: dst is 16-bit aligned, src is 32-bit aligned */ ldr r2, [r1] #ifdef __ARMEB__ strh r2, [r0, #0x02] mov r3, r2, lsr #16 strh r3, [r0] #else strh r2, [r0] mov r3, r2, lsr #16 strh r3, [r0, #0x02] #endif RET LMEMCPY_4_PAD /* * 1001: dst is 16-bit aligned, src is 8-bit aligned */ ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */ mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ strh r1, [r0] #ifdef __ARMEB__ mov r2, r2, lsl #8 /* r2 = 012. */ orr r2, r2, r3, lsr #24 /* r2 = 0123 */ #else mov r2, r2, lsr #24 /* r2 = ...2 */ orr r2, r2, r3, lsl #8 /* r2 = xx32 */ #endif strh r2, [r0, #0x02] RET LMEMCPY_4_PAD /* * 1010: dst is 16-bit aligned, src is 16-bit aligned */ ldrh r2, [r1] ldrh r3, [r1, #0x02] strh r2, [r0] strh r3, [r0, #0x02] RET LMEMCPY_4_PAD /* * 1011: dst is 16-bit aligned, src is 8-bit aligned */ ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */ ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */ strh r1, [r0, #0x02] #ifdef __ARMEB__ mov r3, r3, lsr #24 /* r3 = ...1 */ orr r3, r3, r2, lsl #8 /* r3 = xx01 */ #else mov r3, r3, lsl #8 /* r3 = 321. */ orr r3, r3, r2, lsr #24 /* r3 = 3210 */ #endif strh r3, [r0] RET LMEMCPY_4_PAD /* * 1100: dst is 8-bit aligned, src is 32-bit aligned */ ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ #ifdef __ARMEB__ strb r2, [r0, #0x03] mov r3, r2, lsr #8 mov r1, r2, lsr #24 strh r3, [r0, #0x01] strb r1, [r0] #else strb r2, [r0] mov r3, r2, lsr #8 mov r1, r2, lsr #24 strh r3, [r0, #0x01] strb r1, [r0, #0x03] #endif RET LMEMCPY_4_PAD /* * 1101: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r2, [r1] ldrh r3, [r1, #0x01] ldrb r1, [r1, #0x03] strb r2, [r0] strh r3, [r0, #0x01] strb r1, [r0, #0x03] RET LMEMCPY_4_PAD /* * 1110: dst is 8-bit aligned, src is 16-bit aligned */ #ifdef __ARMEB__ ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ strb r3, [r0, #0x03] mov r3, r3, lsr #8 /* r3 = ...2 */ orr r3, r3, r2, lsl #8 /* r3 = ..12 */ strh r3, [r0, #0x01] mov r2, r2, lsr #8 /* r2 = ...0 */ strb r2, [r0] #else ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ strb r2, [r0] mov r2, r2, lsr #8 /* r2 = ...1 */ orr r2, r2, r3, lsl #8 /* r2 = .321 */ strh r2, [r0, #0x01] mov r3, r3, lsr #8 /* r3 = ...3 */ strb r3, [r0, #0x03] #endif RET LMEMCPY_4_PAD /* * 1111: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r2, [r1] ldrh r3, [r1, #0x01] ldrb r1, [r1, #0x03] strb r2, [r0] strh r3, [r0, #0x01] strb r1, [r0, #0x03] RET LMEMCPY_4_PAD /****************************************************************************** * Special case for 6 byte copies */ #define LMEMCPY_6_LOG2 6 /* 64 bytes */ #define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2 LMEMCPY_6_PAD .Lmemcpy_6: and r2, r1, #0x03 orr r2, r2, r0, lsl #2 ands r2, r2, #0x0f sub r3, pc, #0x14 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2 /* * 0000: dst is 32-bit aligned, src is 32-bit aligned */ ldr r2, [r1] ldrh r3, [r1, #0x04] str r2, [r0] strh r3, [r0, #0x04] RET LMEMCPY_6_PAD /* * 0001: dst is 32-bit aligned, src is 8-bit aligned */ ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */ #ifdef __ARMEB__ mov r2, r2, lsl #8 /* r2 = 012. */ orr r2, r2, r3, lsr #24 /* r2 = 0123 */ #else mov r2, r2, lsr #8 /* r2 = .210 */ orr r2, r2, r3, lsl #24 /* r2 = 3210 */ #endif mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */ str r2, [r0] strh r3, [r0, #0x04] RET LMEMCPY_6_PAD /* * 0010: dst is 32-bit aligned, src is 16-bit aligned */ ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ #ifdef __ARMEB__ mov r1, r3, lsr #16 /* r1 = ..23 */ orr r1, r1, r2, lsl #16 /* r1 = 0123 */ str r1, [r0] strh r3, [r0, #0x04] #else mov r1, r3, lsr #16 /* r1 = ..54 */ orr r2, r2, r3, lsl #16 /* r2 = 3210 */ str r2, [r0] strh r1, [r0, #0x04] #endif RET LMEMCPY_6_PAD /* * 0011: dst is 32-bit aligned, src is 8-bit aligned */ ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */ ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */ #ifdef __ARMEB__ mov r2, r2, lsl #24 /* r2 = 0... */ orr r2, r2, r3, lsr #8 /* r2 = 0123 */ mov r3, r3, lsl #8 /* r3 = 234. */ orr r1, r3, r1, lsr #24 /* r1 = 2345 */ #else mov r2, r2, lsr #24 /* r2 = ...0 */ orr r2, r2, r3, lsl #8 /* r2 = 3210 */ mov r1, r1, lsl #8 /* r1 = xx5. */ orr r1, r1, r3, lsr #24 /* r1 = xx54 */ #endif str r2, [r0] strh r1, [r0, #0x04] RET LMEMCPY_6_PAD /* * 0100: dst is 8-bit aligned, src is 32-bit aligned */ ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */ mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ strh r1, [r0, #0x01] #ifdef __ARMEB__ mov r1, r3, lsr #24 /* r1 = ...0 */ strb r1, [r0] mov r3, r3, lsl #8 /* r3 = 123. */ orr r3, r3, r2, lsr #8 /* r3 = 1234 */ #else strb r3, [r0] mov r3, r3, lsr #24 /* r3 = ...3 */ orr r3, r3, r2, lsl #8 /* r3 = .543 */ mov r2, r2, lsr #8 /* r2 = ...5 */ #endif strh r3, [r0, #0x03] strb r2, [r0, #0x05] RET LMEMCPY_6_PAD /* * 0101: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r2, [r1] ldrh r3, [r1, #0x01] ldrh ip, [r1, #0x03] ldrb r1, [r1, #0x05] strb r2, [r0] strh r3, [r0, #0x01] strh ip, [r0, #0x03] strb r1, [r0, #0x05] RET LMEMCPY_6_PAD /* * 0110: dst is 8-bit aligned, src is 16-bit aligned */ ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ #ifdef __ARMEB__ mov r3, r2, lsr #8 /* r3 = ...0 */ strb r3, [r0] strb r1, [r0, #0x05] mov r3, r1, lsr #8 /* r3 = .234 */ strh r3, [r0, #0x03] mov r3, r2, lsl #8 /* r3 = .01. */ orr r3, r3, r1, lsr #24 /* r3 = .012 */ strh r3, [r0, #0x01] #else strb r2, [r0] mov r3, r1, lsr #24 strb r3, [r0, #0x05] mov r3, r1, lsr #8 /* r3 = .543 */ strh r3, [r0, #0x03] mov r3, r2, lsr #8 /* r3 = ...1 */ orr r3, r3, r1, lsl #8 /* r3 = 4321 */ strh r3, [r0, #0x01] #endif RET LMEMCPY_6_PAD /* * 0111: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r2, [r1] ldrh r3, [r1, #0x01] ldrh ip, [r1, #0x03] ldrb r1, [r1, #0x05] strb r2, [r0] strh r3, [r0, #0x01] strh ip, [r0, #0x03] strb r1, [r0, #0x05] RET LMEMCPY_6_PAD /* * 1000: dst is 16-bit aligned, src is 32-bit aligned */ #ifdef __ARMEB__ ldr r2, [r1] /* r2 = 0123 */ ldrh r3, [r1, #0x04] /* r3 = ..45 */ mov r1, r2, lsr #16 /* r1 = ..01 */ orr r3, r3, r2, lsl#16 /* r3 = 2345 */ strh r1, [r0] str r3, [r0, #0x02] #else ldrh r2, [r1, #0x04] /* r2 = ..54 */ ldr r3, [r1] /* r3 = 3210 */ mov r2, r2, lsl #16 /* r2 = 54.. */ orr r2, r2, r3, lsr #16 /* r2 = 5432 */ strh r3, [r0] str r2, [r0, #0x02] #endif RET LMEMCPY_6_PAD /* * 1001: dst is 16-bit aligned, src is 8-bit aligned */ ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */ mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ #ifdef __ARMEB__ mov r2, r2, lsr #8 /* r2 = .345 */ orr r2, r2, r3, lsl #24 /* r2 = 2345 */ #else mov r2, r2, lsl #8 /* r2 = 543. */ orr r2, r2, r3, lsr #24 /* r2 = 5432 */ #endif strh r1, [r0] str r2, [r0, #0x02] RET LMEMCPY_6_PAD /* * 1010: dst is 16-bit aligned, src is 16-bit aligned */ ldrh r2, [r1] ldr r3, [r1, #0x02] strh r2, [r0] str r3, [r0, #0x02] RET LMEMCPY_6_PAD /* * 1011: dst is 16-bit aligned, src is 8-bit aligned */ ldrb r3, [r1] /* r3 = ...0 */ ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ ldrb r1, [r1, #0x05] /* r1 = ...5 */ #ifdef __ARMEB__ mov r3, r3, lsl #8 /* r3 = ..0. */ orr r3, r3, r2, lsr #24 /* r3 = ..01 */ orr r1, r1, r2, lsl #8 /* r1 = 2345 */ #else orr r3, r3, r2, lsl #8 /* r3 = 3210 */ mov r1, r1, lsl #24 /* r1 = 5... */ orr r1, r1, r2, lsr #8 /* r1 = 5432 */ #endif strh r3, [r0] str r1, [r0, #0x02] RET LMEMCPY_6_PAD /* * 1100: dst is 8-bit aligned, src is 32-bit aligned */ ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */ #ifdef __ARMEB__ mov r3, r2, lsr #24 /* r3 = ...0 */ strb r3, [r0] mov r2, r2, lsl #8 /* r2 = 123. */ orr r2, r2, r1, lsr #8 /* r2 = 1234 */ #else strb r2, [r0] mov r2, r2, lsr #8 /* r2 = .321 */ orr r2, r2, r1, lsl #24 /* r2 = 4321 */ mov r1, r1, lsr #8 /* r1 = ...5 */ #endif str r2, [r0, #0x01] strb r1, [r0, #0x05] RET LMEMCPY_6_PAD /* * 1101: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r2, [r1] ldrh r3, [r1, #0x01] ldrh ip, [r1, #0x03] ldrb r1, [r1, #0x05] strb r2, [r0] strh r3, [r0, #0x01] strh ip, [r0, #0x03] strb r1, [r0, #0x05] RET LMEMCPY_6_PAD /* * 1110: dst is 8-bit aligned, src is 16-bit aligned */ ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ #ifdef __ARMEB__ mov r3, r2, lsr #8 /* r3 = ...0 */ strb r3, [r0] mov r2, r2, lsl #24 /* r2 = 1... */ orr r2, r2, r1, lsr #8 /* r2 = 1234 */ #else strb r2, [r0] mov r2, r2, lsr #8 /* r2 = ...1 */ orr r2, r2, r1, lsl #8 /* r2 = 4321 */ mov r1, r1, lsr #24 /* r1 = ...5 */ #endif str r2, [r0, #0x01] strb r1, [r0, #0x05] RET LMEMCPY_6_PAD /* * 1111: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r2, [r1] ldr r3, [r1, #0x01] ldrb r1, [r1, #0x05] strb r2, [r0] str r3, [r0, #0x01] strb r1, [r0, #0x05] RET LMEMCPY_6_PAD /****************************************************************************** * Special case for 8 byte copies */ #define LMEMCPY_8_LOG2 6 /* 64 bytes */ #define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2 LMEMCPY_8_PAD .Lmemcpy_8: and r2, r1, #0x03 orr r2, r2, r0, lsl #2 ands r2, r2, #0x0f sub r3, pc, #0x14 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2 /* * 0000: dst is 32-bit aligned, src is 32-bit aligned */ ldr r2, [r1] ldr r3, [r1, #0x04] str r2, [r0] str r3, [r0, #0x04] RET LMEMCPY_8_PAD /* * 0001: dst is 32-bit aligned, src is 8-bit aligned */ ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */ ldrb r1, [r1, #0x07] /* r1 = ...7 */ #ifdef __ARMEB__ mov r3, r3, lsl #8 /* r3 = 012. */ orr r3, r3, r2, lsr #24 /* r3 = 0123 */ orr r2, r1, r2, lsl #8 /* r2 = 4567 */ #else mov r3, r3, lsr #8 /* r3 = .210 */ orr r3, r3, r2, lsl #24 /* r3 = 3210 */ mov r1, r1, lsl #24 /* r1 = 7... */ orr r2, r1, r2, lsr #8 /* r2 = 7654 */ #endif str r3, [r0] str r2, [r0, #0x04] RET LMEMCPY_8_PAD /* * 0010: dst is 32-bit aligned, src is 16-bit aligned */ ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ #ifdef __ARMEB__ mov r2, r2, lsl #16 /* r2 = 01.. */ orr r2, r2, r3, lsr #16 /* r2 = 0123 */ orr r3, r1, r3, lsl #16 /* r3 = 4567 */ #else orr r2, r2, r3, lsl #16 /* r2 = 3210 */ mov r3, r3, lsr #16 /* r3 = ..54 */ orr r3, r3, r1, lsl #16 /* r3 = 7654 */ #endif str r2, [r0] str r3, [r0, #0x04] RET LMEMCPY_8_PAD /* * 0011: dst is 32-bit aligned, src is 8-bit aligned */ ldrb r3, [r1] /* r3 = ...0 */ ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */ #ifdef __ARMEB__ mov r3, r3, lsl #24 /* r3 = 0... */ orr r3, r3, r2, lsr #8 /* r3 = 0123 */ mov r2, r2, lsl #24 /* r2 = 4... */ orr r2, r2, r1, lsr #8 /* r2 = 4567 */ #else orr r3, r3, r2, lsl #8 /* r3 = 3210 */ mov r2, r2, lsr #24 /* r2 = ...4 */ orr r2, r2, r1, lsl #8 /* r2 = 7654 */ #endif str r3, [r0] str r2, [r0, #0x04] RET LMEMCPY_8_PAD /* * 0100: dst is 8-bit aligned, src is 32-bit aligned */ ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */ #ifdef __ARMEB__ mov r1, r3, lsr #24 /* r1 = ...0 */ strb r1, [r0] mov r1, r3, lsr #8 /* r1 = .012 */ strb r2, [r0, #0x07] mov r3, r3, lsl #24 /* r3 = 3... */ orr r3, r3, r2, lsr #8 /* r3 = 3456 */ #else strb r3, [r0] mov r1, r2, lsr #24 /* r1 = ...7 */ strb r1, [r0, #0x07] mov r1, r3, lsr #8 /* r1 = .321 */ mov r3, r3, lsr #24 /* r3 = ...3 */ orr r3, r3, r2, lsl #8 /* r3 = 6543 */ #endif strh r1, [r0, #0x01] str r3, [r0, #0x03] RET LMEMCPY_8_PAD /* * 0101: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r2, [r1] ldrh r3, [r1, #0x01] ldr ip, [r1, #0x03] ldrb r1, [r1, #0x07] strb r2, [r0] strh r3, [r0, #0x01] str ip, [r0, #0x03] strb r1, [r0, #0x07] RET LMEMCPY_8_PAD /* * 0110: dst is 8-bit aligned, src is 16-bit aligned */ ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ #ifdef __ARMEB__ mov ip, r2, lsr #8 /* ip = ...0 */ strb ip, [r0] mov ip, r2, lsl #8 /* ip = .01. */ orr ip, ip, r3, lsr #24 /* ip = .012 */ strb r1, [r0, #0x07] mov r3, r3, lsl #8 /* r3 = 345. */ orr r3, r3, r1, lsr #8 /* r3 = 3456 */ #else strb r2, [r0] /* 0 */ mov ip, r1, lsr #8 /* ip = ...7 */ strb ip, [r0, #0x07] /* 7 */ mov ip, r2, lsr #8 /* ip = ...1 */ orr ip, ip, r3, lsl #8 /* ip = 4321 */ mov r3, r3, lsr #8 /* r3 = .543 */ orr r3, r3, r1, lsl #24 /* r3 = 6543 */ #endif strh ip, [r0, #0x01] str r3, [r0, #0x03] RET LMEMCPY_8_PAD /* * 0111: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r3, [r1] /* r3 = ...0 */ ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */ ldrb r1, [r1, #0x07] /* r1 = ...7 */ strb r3, [r0] mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */ #ifdef __ARMEB__ strh r3, [r0, #0x01] orr r2, r2, ip, lsl #16 /* r2 = 3456 */ #else strh ip, [r0, #0x01] orr r2, r3, r2, lsl #16 /* r2 = 6543 */ #endif str r2, [r0, #0x03] strb r1, [r0, #0x07] RET LMEMCPY_8_PAD /* * 1000: dst is 16-bit aligned, src is 32-bit aligned */ ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ #ifdef __ARMEB__ strh r1, [r0] mov r1, r3, lsr #16 /* r1 = ..45 */ orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */ #else strh r2, [r0] orr r2, r1, r3, lsl #16 /* r2 = 5432 */ mov r3, r3, lsr #16 /* r3 = ..76 */ #endif str r2, [r0, #0x02] strh r3, [r0, #0x06] RET LMEMCPY_8_PAD /* * 1001: dst is 16-bit aligned, src is 8-bit aligned */ ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ ldrb ip, [r1, #0x07] /* ip = ...7 */ mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ strh r1, [r0] #ifdef __ARMEB__ mov r1, r2, lsl #24 /* r1 = 2... */ orr r1, r1, r3, lsr #8 /* r1 = 2345 */ orr r3, ip, r3, lsl #8 /* r3 = 4567 */ #else mov r1, r2, lsr #24 /* r1 = ...2 */ orr r1, r1, r3, lsl #8 /* r1 = 5432 */ mov r3, r3, lsr #24 /* r3 = ...6 */ orr r3, r3, ip, lsl #8 /* r3 = ..76 */ #endif str r1, [r0, #0x02] strh r3, [r0, #0x06] RET LMEMCPY_8_PAD /* * 1010: dst is 16-bit aligned, src is 16-bit aligned */ ldrh r2, [r1] ldr ip, [r1, #0x02] ldrh r3, [r1, #0x06] strh r2, [r0] str ip, [r0, #0x02] strh r3, [r0, #0x06] RET LMEMCPY_8_PAD /* * 1011: dst is 16-bit aligned, src is 8-bit aligned */ ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */ ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ ldrb ip, [r1] /* ip = ...0 */ mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */ strh r1, [r0, #0x06] #ifdef __ARMEB__ mov r3, r3, lsr #24 /* r3 = ...5 */ orr r3, r3, r2, lsl #8 /* r3 = 2345 */ mov r2, r2, lsr #24 /* r2 = ...1 */ orr r2, r2, ip, lsl #8 /* r2 = ..01 */ #else mov r3, r3, lsl #24 /* r3 = 5... */ orr r3, r3, r2, lsr #8 /* r3 = 5432 */ orr r2, ip, r2, lsl #8 /* r2 = 3210 */ #endif str r3, [r0, #0x02] strh r2, [r0] RET LMEMCPY_8_PAD /* * 1100: dst is 8-bit aligned, src is 32-bit aligned */ ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */ strh r1, [r0, #0x05] #ifdef __ARMEB__ strb r3, [r0, #0x07] mov r1, r2, lsr #24 /* r1 = ...0 */ strb r1, [r0] mov r2, r2, lsl #8 /* r2 = 123. */ orr r2, r2, r3, lsr #24 /* r2 = 1234 */ str r2, [r0, #0x01] #else strb r2, [r0] mov r1, r3, lsr #24 /* r1 = ...7 */ strb r1, [r0, #0x07] mov r2, r2, lsr #8 /* r2 = .321 */ orr r2, r2, r3, lsl #24 /* r2 = 4321 */ str r2, [r0, #0x01] #endif RET LMEMCPY_8_PAD /* * 1101: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r3, [r1] /* r3 = ...0 */ ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */ ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ ldrb r1, [r1, #0x07] /* r1 = ...7 */ strb r3, [r0] mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */ #ifdef __ARMEB__ strh ip, [r0, #0x05] orr r2, r3, r2, lsl #16 /* r2 = 1234 */ #else strh r3, [r0, #0x05] orr r2, r2, ip, lsl #16 /* r2 = 4321 */ #endif str r2, [r0, #0x01] strb r1, [r0, #0x07] RET LMEMCPY_8_PAD /* * 1110: dst is 8-bit aligned, src is 16-bit aligned */ ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ #ifdef __ARMEB__ mov ip, r2, lsr #8 /* ip = ...0 */ strb ip, [r0] mov ip, r2, lsl #24 /* ip = 1... */ orr ip, ip, r3, lsr #8 /* ip = 1234 */ strb r1, [r0, #0x07] mov r1, r1, lsr #8 /* r1 = ...6 */ orr r1, r1, r3, lsl #8 /* r1 = 3456 */ #else strb r2, [r0] mov ip, r2, lsr #8 /* ip = ...1 */ orr ip, ip, r3, lsl #8 /* ip = 4321 */ mov r2, r1, lsr #8 /* r2 = ...7 */ strb r2, [r0, #0x07] mov r1, r1, lsl #8 /* r1 = .76. */ orr r1, r1, r3, lsr #24 /* r1 = .765 */ #endif str ip, [r0, #0x01] strh r1, [r0, #0x05] RET LMEMCPY_8_PAD /* * 1111: dst is 8-bit aligned, src is 8-bit aligned */ ldrb r2, [r1] ldr ip, [r1, #0x01] ldrh r3, [r1, #0x05] ldrb r1, [r1, #0x07] strb r2, [r0] str ip, [r0, #0x01] strh r3, [r0, #0x05] strb r1, [r0, #0x07] RET LMEMCPY_8_PAD /****************************************************************************** * Special case for 12 byte copies */ #define LMEMCPY_C_LOG2 7 /* 128 bytes */ #define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2 LMEMCPY_C_PAD .Lmemcpy_c: and r2, r1, #0x03 orr r2, r2, r0, lsl #2 ands r2, r2, #0x0f sub r3, pc, #0x14 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2 /* * 0000: dst is 32-bit aligned, src is 32-bit aligned */ ldr r2, [r1] ldr r3, [r1, #0x04] ldr r1, [r1, #0x08] str r2, [r0] str r3, [r0, #0x04] str r1, [r0, #0x08] RET LMEMCPY_C_PAD /* * 0001: dst is 32-bit aligned, src is 8-bit aligned */ ldrb r2, [r1, #0xb] /* r2 = ...B */ ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ #ifdef __ARMEB__ orr r2, r2, ip, lsl #8 /* r2 = 89AB */ str r2, [r0, #0x08] mov r2, ip, lsr #24 /* r2 = ...7 */ orr r2, r2, r3, lsl #8 /* r2 = 4567 */ mov r1, r1, lsl #8 /* r1 = 012. */ orr r1, r1, r3, lsr #24 /* r1 = 0123 */ #else mov r2, r2, lsl #24 /* r2 = B... */ orr r2, r2, ip, lsr #8 /* r2 = BA98 */ str r2, [r0, #0x08] mov r2, ip, lsl #24 /* r2 = 7... */ orr r2, r2, r3, lsr #8 /* r2 = 7654 */ mov r1, r1, lsr #8 /* r1 = .210 */ orr r1, r1, r3, lsl #24 /* r1 = 3210 */ #endif str r2, [r0, #0x04] str r1, [r0] RET LMEMCPY_C_PAD /* * 0010: dst is 32-bit aligned, src is 16-bit aligned */ ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ #ifdef __ARMEB__ mov r2, r2, lsl #16 /* r2 = 01.. */ orr r2, r2, r3, lsr #16 /* r2 = 0123 */ str r2, [r0] mov r3, r3, lsl #16 /* r3 = 45.. */ orr r3, r3, ip, lsr #16 /* r3 = 4567 */ orr r1, r1, ip, lsl #16 /* r1 = 89AB */ #else orr r2, r2, r3, lsl #16 /* r2 = 3210 */ str r2, [r0] mov r3, r3, lsr #16 /* r3 = ..54 */ orr r3, r3, ip, lsl #16 /* r3 = 7654 */ mov r1, r1, lsl #16 /* r1 = BA.. */ orr r1, r1, ip, lsr #16 /* r1 = BA98 */ #endif str r3, [r0, #0x04] str r1, [r0, #0x08] RET LMEMCPY_C_PAD /* * 0011: dst is 32-bit aligned, src is 8-bit aligned */ ldrb r2, [r1] /* r2 = ...0 */ ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ #ifdef __ARMEB__ mov r2, r2, lsl #24 /* r2 = 0... */ orr r2, r2, r3, lsr #8 /* r2 = 0123 */ str r2, [r0] mov r3, r3, lsl #24 /* r3 = 4... */ orr r3, r3, ip, lsr #8 /* r3 = 4567 */ mov r1, r1, lsr #8 /* r1 = .9AB */ orr r1, r1, ip, lsl #24 /* r1 = 89AB */ #else orr r2, r2, r3, lsl #8 /* r2 = 3210 */ str r2, [r0] mov r3, r3, lsr #24 /* r3 = ...4 */ orr r3, r3, ip, lsl #8 /* r3 = 7654 */ mov r1, r1, lsl #8 /* r1 = BA9. */ orr r1, r1, ip, lsr #24 /* r1 = BA98 */ #endif str r3, [r0, #0x04] str r1, [r0, #0x08] RET LMEMCPY_C_PAD /* * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned */ ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */ mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ strh r1, [r0, #0x01] #ifdef __ARMEB__ mov r1, r2, lsr #24 /* r1 = ...0 */ strb r1, [r0] mov r1, r2, lsl #24 /* r1 = 3... */ orr r2, r1, r3, lsr #8 /* r1 = 3456 */ mov r1, r3, lsl #24 /* r1 = 7... */ orr r1, r1, ip, lsr #8 /* r1 = 789A */ #else strb r2, [r0] mov r1, r2, lsr #24 /* r1 = ...3 */ orr r2, r1, r3, lsl #8 /* r1 = 6543 */ mov r1, r3, lsr #24 /* r1 = ...7 */ orr r1, r1, ip, lsl #8 /* r1 = A987 */ mov ip, ip, lsr #24 /* ip = ...B */ #endif str r2, [r0, #0x03] str r1, [r0, #0x07] strb ip, [r0, #0x0b] RET LMEMCPY_C_PAD /* * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1) */ ldrb r2, [r1] ldrh r3, [r1, #0x01] ldr ip, [r1, #0x03] strb r2, [r0] ldr r2, [r1, #0x07] ldrb r1, [r1, #0x0b] strh r3, [r0, #0x01] str ip, [r0, #0x03] str r2, [r0, #0x07] strb r1, [r0, #0x0b] RET LMEMCPY_C_PAD /* * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned */ ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ #ifdef __ARMEB__ mov r2, r2, ror #8 /* r2 = 1..0 */ strb r2, [r0] mov r2, r2, lsr #16 /* r2 = ..1. */ orr r2, r2, r3, lsr #24 /* r2 = ..12 */ strh r2, [r0, #0x01] mov r2, r3, lsl #8 /* r2 = 345. */ orr r3, r2, ip, lsr #24 /* r3 = 3456 */ mov r2, ip, lsl #8 /* r2 = 789. */ orr r2, r2, r1, lsr #8 /* r2 = 789A */ #else strb r2, [r0] mov r2, r2, lsr #8 /* r2 = ...1 */ orr r2, r2, r3, lsl #8 /* r2 = 4321 */ strh r2, [r0, #0x01] mov r2, r3, lsr #8 /* r2 = .543 */ orr r3, r2, ip, lsl #24 /* r3 = 6543 */ mov r2, ip, lsr #8 /* r2 = .987 */ orr r2, r2, r1, lsl #24 /* r2 = A987 */ mov r1, r1, lsr #8 /* r1 = ...B */ #endif str r3, [r0, #0x03] str r2, [r0, #0x07] strb r1, [r0, #0x0b] RET LMEMCPY_C_PAD /* * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3) */ ldrb r2, [r1] ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ strb r2, [r0] #ifdef __ARMEB__ mov r2, r3, lsr #16 /* r2 = ..12 */ strh r2, [r0, #0x01] mov r3, r3, lsl #16 /* r3 = 34.. */ orr r3, r3, ip, lsr #16 /* r3 = 3456 */ mov ip, ip, lsl #16 /* ip = 78.. */ orr ip, ip, r1, lsr #16 /* ip = 789A */ mov r1, r1, lsr #8 /* r1 = .9AB */ #else strh r3, [r0, #0x01] mov r3, r3, lsr #16 /* r3 = ..43 */ orr r3, r3, ip, lsl #16 /* r3 = 6543 */ mov ip, ip, lsr #16 /* ip = ..87 */ orr ip, ip, r1, lsl #16 /* ip = A987 */ mov r1, r1, lsr #16 /* r1 = ..xB */ #endif str r3, [r0, #0x03] str ip, [r0, #0x07] strb r1, [r0, #0x0b] RET LMEMCPY_C_PAD /* * 1000: dst is 16-bit aligned, src is 32-bit aligned */ ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */ ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */ mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ #ifdef __ARMEB__ strh r1, [r0] mov r1, ip, lsl #16 /* r1 = 23.. */ orr r1, r1, r3, lsr #16 /* r1 = 2345 */ mov r3, r3, lsl #16 /* r3 = 67.. */ orr r3, r3, r2, lsr #16 /* r3 = 6789 */ #else strh ip, [r0] orr r1, r1, r3, lsl #16 /* r1 = 5432 */ mov r3, r3, lsr #16 /* r3 = ..76 */ orr r3, r3, r2, lsl #16 /* r3 = 9876 */ mov r2, r2, lsr #16 /* r2 = ..BA */ #endif str r1, [r0, #0x02] str r3, [r0, #0x06] strh r2, [r0, #0x0a] RET LMEMCPY_C_PAD /* * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1) */ ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */ strh ip, [r0] ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ ldrb r1, [r1, #0x0b] /* r1 = ...B */ #ifdef __ARMEB__ mov r2, r2, lsl #24 /* r2 = 2... */ orr r2, r2, r3, lsr #8 /* r2 = 2345 */ mov r3, r3, lsl #24 /* r3 = 6... */ orr r3, r3, ip, lsr #8 /* r3 = 6789 */ orr r1, r1, ip, lsl #8 /* r1 = 89AB */ #else mov r2, r2, lsr #24 /* r2 = ...2 */ orr r2, r2, r3, lsl #8 /* r2 = 5432 */ mov r3, r3, lsr #24 /* r3 = ...6 */ orr r3, r3, ip, lsl #8 /* r3 = 9876 */ mov r1, r1, lsl #8 /* r1 = ..B. */ orr r1, r1, ip, lsr #24 /* r1 = ..BA */ #endif str r2, [r0, #0x02] str r3, [r0, #0x06] strh r1, [r0, #0x0a] RET LMEMCPY_C_PAD /* * 1010: dst is 16-bit aligned, src is 16-bit aligned */ ldrh r2, [r1] ldr r3, [r1, #0x02] ldr ip, [r1, #0x06] ldrh r1, [r1, #0x0a] strh r2, [r0] str r3, [r0, #0x02] str ip, [r0, #0x06] strh r1, [r0, #0x0a] RET LMEMCPY_C_PAD /* * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3) */ ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */ ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */ mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */ strh ip, [r0, #0x0a] ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ ldrb r1, [r1] /* r1 = ...0 */ #ifdef __ARMEB__ mov r2, r2, lsr #24 /* r2 = ...9 */ orr r2, r2, r3, lsl #8 /* r2 = 6789 */ mov r3, r3, lsr #24 /* r3 = ...5 */ orr r3, r3, ip, lsl #8 /* r3 = 2345 */ mov r1, r1, lsl #8 /* r1 = ..0. */ orr r1, r1, ip, lsr #24 /* r1 = ..01 */ #else mov r2, r2, lsl #24 /* r2 = 9... */ orr r2, r2, r3, lsr #8 /* r2 = 9876 */ mov r3, r3, lsl #24 /* r3 = 5... */ orr r3, r3, ip, lsr #8 /* r3 = 5432 */ orr r1, r1, ip, lsl #8 /* r1 = 3210 */ #endif str r2, [r0, #0x06] str r3, [r0, #0x02] strh r1, [r0] RET LMEMCPY_C_PAD /* * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned */ ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */ ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */ #ifdef __ARMEB__ mov r3, r2, lsr #24 /* r3 = ...0 */ strb r3, [r0] mov r2, r2, lsl #8 /* r2 = 123. */ orr r2, r2, ip, lsr #24 /* r2 = 1234 */ str r2, [r0, #0x01] mov r2, ip, lsl #8 /* r2 = 567. */ orr r2, r2, r1, lsr #24 /* r2 = 5678 */ str r2, [r0, #0x05] mov r2, r1, lsr #8 /* r2 = ..9A */ strh r2, [r0, #0x09] strb r1, [r0, #0x0b] #else strb r2, [r0] mov r3, r2, lsr #8 /* r3 = .321 */ orr r3, r3, ip, lsl #24 /* r3 = 4321 */ str r3, [r0, #0x01] mov r3, ip, lsr #8 /* r3 = .765 */ orr r3, r3, r1, lsl #24 /* r3 = 8765 */ str r3, [r0, #0x05] mov r1, r1, lsr #8 /* r1 = .BA9 */ strh r1, [r0, #0x09] mov r1, r1, lsr #16 /* r1 = ...B */ strb r1, [r0, #0x0b] #endif RET LMEMCPY_C_PAD /* * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1) */ ldrb r2, [r1, #0x0b] /* r2 = ...B */ ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */ ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ strb r2, [r0, #0x0b] #ifdef __ARMEB__ strh r3, [r0, #0x09] mov r3, r3, lsr #16 /* r3 = ..78 */ orr r3, r3, ip, lsl #16 /* r3 = 5678 */ mov ip, ip, lsr #16 /* ip = ..34 */ orr ip, ip, r1, lsl #16 /* ip = 1234 */ mov r1, r1, lsr #16 /* r1 = ..x0 */ #else mov r2, r3, lsr #16 /* r2 = ..A9 */ strh r2, [r0, #0x09] mov r3, r3, lsl #16 /* r3 = 87.. */ orr r3, r3, ip, lsr #16 /* r3 = 8765 */ mov ip, ip, lsl #16 /* ip = 43.. */ orr ip, ip, r1, lsr #16 /* ip = 4321 */ mov r1, r1, lsr #8 /* r1 = .210 */ #endif str r3, [r0, #0x05] str ip, [r0, #0x01] strb r1, [r0] RET LMEMCPY_C_PAD /* * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned */ #ifdef __ARMEB__ ldrh r2, [r1, #0x0a] /* r2 = ..AB */ ldr ip, [r1, #0x06] /* ip = 6789 */ ldr r3, [r1, #0x02] /* r3 = 2345 */ ldrh r1, [r1] /* r1 = ..01 */ strb r2, [r0, #0x0b] mov r2, r2, lsr #8 /* r2 = ...A */ orr r2, r2, ip, lsl #8 /* r2 = 789A */ mov ip, ip, lsr #8 /* ip = .678 */ orr ip, ip, r3, lsl #24 /* ip = 5678 */ mov r3, r3, lsr #8 /* r3 = .234 */ orr r3, r3, r1, lsl #24 /* r3 = 1234 */ mov r1, r1, lsr #8 /* r1 = ...0 */ strb r1, [r0] str r3, [r0, #0x01] str ip, [r0, #0x05] strh r2, [r0, #0x09] #else ldrh r2, [r1] /* r2 = ..10 */ ldr r3, [r1, #0x02] /* r3 = 5432 */ ldr ip, [r1, #0x06] /* ip = 9876 */ ldrh r1, [r1, #0x0a] /* r1 = ..BA */ strb r2, [r0] mov r2, r2, lsr #8 /* r2 = ...1 */ orr r2, r2, r3, lsl #8 /* r2 = 4321 */ mov r3, r3, lsr #24 /* r3 = ...5 */ orr r3, r3, ip, lsl #8 /* r3 = 8765 */ mov ip, ip, lsr #24 /* ip = ...9 */ orr ip, ip, r1, lsl #8 /* ip = .BA9 */ mov r1, r1, lsr #8 /* r1 = ...B */ str r2, [r0, #0x01] str r3, [r0, #0x05] strh ip, [r0, #0x09] strb r1, [r0, #0x0b] #endif RET LMEMCPY_C_PAD /* * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3) */ ldrb r2, [r1] ldr r3, [r1, #0x01] ldr ip, [r1, #0x05] strb r2, [r0] ldrh r2, [r1, #0x09] ldrb r1, [r1, #0x0b] str r3, [r0, #0x01] str ip, [r0, #0x05] strh r2, [r0, #0x09] strb r1, [r0, #0x0b] RET END(memcpy) #endif /* _ARM_ARCH_5E */ #ifdef GPROF ENTRY(user) nop +END(user) ENTRY(btrap) nop +END(btrap) ENTRY(etrap) nop +END(etrap) ENTRY(bintr) nop +END(bintr) ENTRY(eintr) nop - +END(eintr) #endif Index: stable/10/sys/arm/include/asm.h =================================================================== --- stable/10/sys/arm/include/asm.h (revision 269795) +++ stable/10/sys/arm/include/asm.h (revision 269796) @@ -1,195 +1,212 @@ /* $NetBSD: asm.h,v 1.5 2003/08/07 16:26:53 agc Exp $ */ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)asm.h 5.5 (Berkeley) 5/7/91 * * $FreeBSD$ */ #ifndef _MACHINE_ASM_H_ #define _MACHINE_ASM_H_ #include #define _C_LABEL(x) x #define _ASM_LABEL(x) x #define I32_bit (1 << 7) /* IRQ disable */ #define F32_bit (1 << 6) /* FIQ disable */ #define CPU_CONTROL_32BP_ENABLE 0x00000010 /* P: 32-bit exception handlers */ #define CPU_CONTROL_32BD_ENABLE 0x00000020 /* D: 32-bit addressing */ #ifndef _ALIGN_TEXT # define _ALIGN_TEXT .align 0 #endif #ifdef __ARM_EABI__ #define STOP_UNWINDING .cantunwind #define _FNSTART .fnstart #define _FNEND .fnend #else #define STOP_UNWINDING #define _FNSTART #define _FNEND #endif /* * gas/arm uses @ as a single comment character and thus cannot be used here * Instead it recognised the # instead of an @ symbols in .type directives * We define a couple of macros so that assembly code will not be dependent * on one or the other. */ #define _ASM_TYPE_FUNCTION #function #define _ASM_TYPE_OBJECT #object #define GLOBAL(X) .globl x #define _ENTRY(x) \ .text; _ALIGN_TEXT; .globl x; .type x,_ASM_TYPE_FUNCTION; x: _FNSTART - #define _END(x) .size x, . - x; _FNEND +/* + * EENTRY()/EEND() mark "extra" entry/exit points from a function. + * The unwind info cannot handle the concept of a nested function, or a function + * with multiple .fnstart directives, but some of our assembler code is written + * with multiple labels to allow entry at several points. The EENTRY() macro + * defines such an extra entry point without a new .fnstart, so that it's + * basically just a label that you can jump to. The EEND() macro does nothing + * at all, except document the exit point associated with the same-named entry. + */ +#define _EENTRY(x) .globl x; .type x,_ASM_TYPE_FUNCTION; x: +#define _EEND(x) /* nothing */ + #ifdef GPROF # define _PROF_PROLOGUE \ mov ip, lr; bl __mcount #else # define _PROF_PROLOGUE #endif #define ENTRY(y) _ENTRY(_C_LABEL(y)); _PROF_PROLOGUE +#define EENTRY(y) _EENTRY(_C_LABEL(y)); _PROF_PROLOGUE #define ENTRY_NP(y) _ENTRY(_C_LABEL(y)) +#define EENTRY_NP(y) _EENTRY(_C_LABEL(y)) #define END(y) _END(_C_LABEL(y)) +#define EEND(y) #define ASENTRY(y) _ENTRY(_ASM_LABEL(y)); _PROF_PROLOGUE +#define ASEENTRY(y) _EENTRY(_ASM_LABEL(y)); _PROF_PROLOGUE #define ASENTRY_NP(y) _ENTRY(_ASM_LABEL(y)) +#define ASEENTRY_NP(y) _EENTRY(_ASM_LABEL(y)) #define ASEND(y) _END(_ASM_LABEL(y)) +#define ASEEND(y) #define ASMSTR .asciz #if defined(PIC) #define PLT_SYM(x) PIC_SYM(x, PLT) #define GOT_SYM(x) PIC_SYM(x, GOT) #define GOT_GET(x,got,sym) \ ldr x, sym; \ ldr x, [x, got] #define GOT_INIT(got,gotsym,pclabel) \ ldr got, gotsym; \ add got, got, pc; \ pclabel: #define GOT_INITSYM(gotsym,pclabel) \ gotsym: .word _C_LABEL(_GLOBAL_OFFSET_TABLE_) + (. - (pclabel+4)) #ifdef __STDC__ #define PIC_SYM(x,y) x ## ( ## y ## ) #else #define PIC_SYM(x,y) x/**/(/**/y/**/) #endif #else #define PLT_SYM(x) x #define GOT_SYM(x) x #define GOT_GET(x,got,sym) \ ldr x, sym; #define GOT_INIT(got,gotsym,pclabel) #define GOT_INITSYM(gotsym,pclabel) #define PIC_SYM(x,y) x #endif /* PIC */ #undef __FBSDID #if !defined(lint) && !defined(STRIP_FBSDID) #define __FBSDID(s) .ident s #else #define __FBSDID(s) /* nothing */ #endif #define WEAK_ALIAS(alias,sym) \ .weak alias; \ alias = sym #ifdef __STDC__ #define WARN_REFERENCES(sym,msg) \ .stabs msg ## ,30,0,0,0 ; \ .stabs __STRING(_C_LABEL(sym)) ## ,1,0,0,0 #else #define WARN_REFERENCES(sym,msg) \ .stabs msg,30,0,0,0 ; \ .stabs __STRING(sym),1,0,0,0 #endif /* __STDC__ */ /* Exactly one of the __ARM_ARCH_*__ macros will be defined by the compiler. */ /* The _ARM_ARCH_* macros are deprecated and will be removed soon. */ /* This should be moved into another header so it can be used in * both asm and C code. machine/asm.h cannot be included in C code. */ #if defined (__ARM_ARCH_7__) || defined (__ARM_ARCH_7A__) #define _ARM_ARCH_7 #define _HAVE_ARMv7_INSTRUCTIONS 1 #endif #if defined (_HAVE_ARMv7_INSTRUCTIONS) || defined (__ARM_ARCH_6__) || \ defined (__ARM_ARCH_6J__) || defined (__ARM_ARCH_6K__) || \ defined (__ARM_ARCH_6Z__) || defined (__ARM_ARCH_6ZK__) #define _ARM_ARCH_6 #define _HAVE_ARMv6_INSTRUCTIONS 1 #endif #if defined (_HAVE_ARMv6_INSTRUCTIONS) || defined (__ARM_ARCH_5TE__) || \ defined (__ARM_ARCH_5TEJ__) || defined (__ARM_ARCH_5E__) #define _ARM_ARCH_5E #define _HAVE_ARMv5E_INSTRUCTIONS 1 #endif #if defined (_HAVE_ARMv5E_INSTRUCTIONS) || defined (__ARM_ARCH_5__) || \ defined (__ARM_ARCH_5T__) #define _ARM_ARCH_5 #define _HAVE_ARMv5_INSTRUCTIONS 1 #endif #if defined (_HAVE_ARMv5_INSTRUCTIONS) || defined (__ARM_ARCH_4T__) #define _ARM_ARCH_4T #define _HAVE_ARMv4T_INSTRUCTIONS 1 #endif /* FreeBSD requires ARMv4, so this is always set. */ #define _HAVE_ARMv4_INSTRUCTIONS 1 #if defined (_HAVE_ARMv4T_INSTRUCTIONS) # define RET bx lr # define RETeq bxeq lr # define RETne bxne lr # define RETc(c) bx##c lr #else # define RET mov pc, lr # define RETeq moveq pc, lr # define RETne movne pc, lr # define RETc(c) mov##c pc, lr #endif #endif /* !_MACHINE_ASM_H_ */ Index: stable/10/sys/libkern/arm/divsi3.S =================================================================== --- stable/10/sys/libkern/arm/divsi3.S (revision 269795) +++ stable/10/sys/libkern/arm/divsi3.S (revision 269796) @@ -1,408 +1,408 @@ /* $NetBSD: divsi3.S,v 1.4 2003/04/05 23:27:15 bjh21 Exp $ */ /*- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * stack is aligned as there's a possibility of branching to L_overflow * which makes a C call */ ENTRY_NP(__umodsi3) stmfd sp!, {lr} sub sp, sp, #4 /* align stack */ bl .L_udivide add sp, sp, #4 /* unalign stack */ mov r0, r1 ldmfd sp!, {pc} END(__umodsi3) ENTRY_NP(__modsi3) stmfd sp!, {lr} sub sp, sp, #4 /* align stack */ bl .L_divide add sp, sp, #4 /* unalign stack */ mov r0, r1 ldmfd sp!, {pc} .L_overflow: #if !defined(_KERNEL) && !defined(_STANDALONE) mov r0, #8 /* SIGFPE */ bl PIC_SYM(_C_LABEL(raise), PLT) /* raise it */ mov r0, #0 #else /* XXX should cause a fatal error */ mvn r0, #0 #endif RET END(__modsi3) +ENTRY_NP(__udivsi3) #ifdef __ARM_EABI__ -ENTRY_NP(__aeabi_uidiv) -ENTRY_NP(__aeabi_uidivmod) +EENTRY_NP(__aeabi_uidiv) +EENTRY_NP(__aeabi_uidivmod) #endif -ENTRY_NP(__udivsi3) .L_udivide: /* r0 = r0 / r1; r1 = r0 % r1 */ eor r0, r1, r0 eor r1, r0, r1 eor r0, r1, r0 /* r0 = r1 / r0; r1 = r1 % r0 */ cmp r0, #1 bcc .L_overflow beq .L_divide_l0 mov ip, #0 movs r1, r1 bpl .L_divide_l1 orr ip, ip, #0x20000000 /* ip bit 0x20000000 = -ve r1 */ movs r1, r1, lsr #1 orrcs ip, ip, #0x10000000 /* ip bit 0x10000000 = bit 0 of r1 */ b .L_divide_l1 .L_divide_l0: /* r0 == 1 */ mov r0, r1 mov r1, #0 RET #ifdef __ARM_EABI__ -END(__aeabi_uidiv) -END(__aeabi_uidivmod) +EEND(__aeabi_uidiv) +EEND(__aeabi_uidivmod) #endif END(__udivsi3) +ENTRY_NP(__divsi3) #ifdef __ARM_EABI__ -ENTRY_NP(__aeabi_idiv) -ENTRY_NP(__aeabi_idivmod) +EENTRY_NP(__aeabi_idiv) +EENTRY_NP(__aeabi_idivmod) #endif -ENTRY_NP(__divsi3) .L_divide: /* r0 = r0 / r1; r1 = r0 % r1 */ eor r0, r1, r0 eor r1, r0, r1 eor r0, r1, r0 /* r0 = r1 / r0; r1 = r1 % r0 */ cmp r0, #1 bcc .L_overflow beq .L_divide_l0 ands ip, r0, #0x80000000 rsbmi r0, r0, #0 ands r2, r1, #0x80000000 eor ip, ip, r2 rsbmi r1, r1, #0 orr ip, r2, ip, lsr #1 /* ip bit 0x40000000 = -ve division */ /* ip bit 0x80000000 = -ve remainder */ .L_divide_l1: mov r2, #1 mov r3, #0 /* * If the highest bit of the dividend is set, we have to be * careful when shifting the divisor. Test this. */ movs r1,r1 bpl .L_old_code /* * At this point, the highest bit of r1 is known to be set. * We abuse this below in the tst instructions. */ tst r1, r0 /*, lsl #0 */ bmi .L_divide_b1 tst r1, r0, lsl #1 bmi .L_divide_b2 tst r1, r0, lsl #2 bmi .L_divide_b3 tst r1, r0, lsl #3 bmi .L_divide_b4 tst r1, r0, lsl #4 bmi .L_divide_b5 tst r1, r0, lsl #5 bmi .L_divide_b6 tst r1, r0, lsl #6 bmi .L_divide_b7 tst r1, r0, lsl #7 bmi .L_divide_b8 tst r1, r0, lsl #8 bmi .L_divide_b9 tst r1, r0, lsl #9 bmi .L_divide_b10 tst r1, r0, lsl #10 bmi .L_divide_b11 tst r1, r0, lsl #11 bmi .L_divide_b12 tst r1, r0, lsl #12 bmi .L_divide_b13 tst r1, r0, lsl #13 bmi .L_divide_b14 tst r1, r0, lsl #14 bmi .L_divide_b15 tst r1, r0, lsl #15 bmi .L_divide_b16 tst r1, r0, lsl #16 bmi .L_divide_b17 tst r1, r0, lsl #17 bmi .L_divide_b18 tst r1, r0, lsl #18 bmi .L_divide_b19 tst r1, r0, lsl #19 bmi .L_divide_b20 tst r1, r0, lsl #20 bmi .L_divide_b21 tst r1, r0, lsl #21 bmi .L_divide_b22 tst r1, r0, lsl #22 bmi .L_divide_b23 tst r1, r0, lsl #23 bmi .L_divide_b24 tst r1, r0, lsl #24 bmi .L_divide_b25 tst r1, r0, lsl #25 bmi .L_divide_b26 tst r1, r0, lsl #26 bmi .L_divide_b27 tst r1, r0, lsl #27 bmi .L_divide_b28 tst r1, r0, lsl #28 bmi .L_divide_b29 tst r1, r0, lsl #29 bmi .L_divide_b30 tst r1, r0, lsl #30 bmi .L_divide_b31 /* * instead of: * tst r1, r0, lsl #31 * bmi .L_divide_b32 */ b .L_divide_b32 .L_old_code: cmp r1, r0 bcc .L_divide_b0 cmp r1, r0, lsl #1 bcc .L_divide_b1 cmp r1, r0, lsl #2 bcc .L_divide_b2 cmp r1, r0, lsl #3 bcc .L_divide_b3 cmp r1, r0, lsl #4 bcc .L_divide_b4 cmp r1, r0, lsl #5 bcc .L_divide_b5 cmp r1, r0, lsl #6 bcc .L_divide_b6 cmp r1, r0, lsl #7 bcc .L_divide_b7 cmp r1, r0, lsl #8 bcc .L_divide_b8 cmp r1, r0, lsl #9 bcc .L_divide_b9 cmp r1, r0, lsl #10 bcc .L_divide_b10 cmp r1, r0, lsl #11 bcc .L_divide_b11 cmp r1, r0, lsl #12 bcc .L_divide_b12 cmp r1, r0, lsl #13 bcc .L_divide_b13 cmp r1, r0, lsl #14 bcc .L_divide_b14 cmp r1, r0, lsl #15 bcc .L_divide_b15 cmp r1, r0, lsl #16 bcc .L_divide_b16 cmp r1, r0, lsl #17 bcc .L_divide_b17 cmp r1, r0, lsl #18 bcc .L_divide_b18 cmp r1, r0, lsl #19 bcc .L_divide_b19 cmp r1, r0, lsl #20 bcc .L_divide_b20 cmp r1, r0, lsl #21 bcc .L_divide_b21 cmp r1, r0, lsl #22 bcc .L_divide_b22 cmp r1, r0, lsl #23 bcc .L_divide_b23 cmp r1, r0, lsl #24 bcc .L_divide_b24 cmp r1, r0, lsl #25 bcc .L_divide_b25 cmp r1, r0, lsl #26 bcc .L_divide_b26 cmp r1, r0, lsl #27 bcc .L_divide_b27 cmp r1, r0, lsl #28 bcc .L_divide_b28 cmp r1, r0, lsl #29 bcc .L_divide_b29 cmp r1, r0, lsl #30 bcc .L_divide_b30 .L_divide_b32: cmp r1, r0, lsl #31 subhs r1, r1,r0, lsl #31 addhs r3, r3,r2, lsl #31 .L_divide_b31: cmp r1, r0, lsl #30 subhs r1, r1,r0, lsl #30 addhs r3, r3,r2, lsl #30 .L_divide_b30: cmp r1, r0, lsl #29 subhs r1, r1,r0, lsl #29 addhs r3, r3,r2, lsl #29 .L_divide_b29: cmp r1, r0, lsl #28 subhs r1, r1,r0, lsl #28 addhs r3, r3,r2, lsl #28 .L_divide_b28: cmp r1, r0, lsl #27 subhs r1, r1,r0, lsl #27 addhs r3, r3,r2, lsl #27 .L_divide_b27: cmp r1, r0, lsl #26 subhs r1, r1,r0, lsl #26 addhs r3, r3,r2, lsl #26 .L_divide_b26: cmp r1, r0, lsl #25 subhs r1, r1,r0, lsl #25 addhs r3, r3,r2, lsl #25 .L_divide_b25: cmp r1, r0, lsl #24 subhs r1, r1,r0, lsl #24 addhs r3, r3,r2, lsl #24 .L_divide_b24: cmp r1, r0, lsl #23 subhs r1, r1,r0, lsl #23 addhs r3, r3,r2, lsl #23 .L_divide_b23: cmp r1, r0, lsl #22 subhs r1, r1,r0, lsl #22 addhs r3, r3,r2, lsl #22 .L_divide_b22: cmp r1, r0, lsl #21 subhs r1, r1,r0, lsl #21 addhs r3, r3,r2, lsl #21 .L_divide_b21: cmp r1, r0, lsl #20 subhs r1, r1,r0, lsl #20 addhs r3, r3,r2, lsl #20 .L_divide_b20: cmp r1, r0, lsl #19 subhs r1, r1,r0, lsl #19 addhs r3, r3,r2, lsl #19 .L_divide_b19: cmp r1, r0, lsl #18 subhs r1, r1,r0, lsl #18 addhs r3, r3,r2, lsl #18 .L_divide_b18: cmp r1, r0, lsl #17 subhs r1, r1,r0, lsl #17 addhs r3, r3,r2, lsl #17 .L_divide_b17: cmp r1, r0, lsl #16 subhs r1, r1,r0, lsl #16 addhs r3, r3,r2, lsl #16 .L_divide_b16: cmp r1, r0, lsl #15 subhs r1, r1,r0, lsl #15 addhs r3, r3,r2, lsl #15 .L_divide_b15: cmp r1, r0, lsl #14 subhs r1, r1,r0, lsl #14 addhs r3, r3,r2, lsl #14 .L_divide_b14: cmp r1, r0, lsl #13 subhs r1, r1,r0, lsl #13 addhs r3, r3,r2, lsl #13 .L_divide_b13: cmp r1, r0, lsl #12 subhs r1, r1,r0, lsl #12 addhs r3, r3,r2, lsl #12 .L_divide_b12: cmp r1, r0, lsl #11 subhs r1, r1,r0, lsl #11 addhs r3, r3,r2, lsl #11 .L_divide_b11: cmp r1, r0, lsl #10 subhs r1, r1,r0, lsl #10 addhs r3, r3,r2, lsl #10 .L_divide_b10: cmp r1, r0, lsl #9 subhs r1, r1,r0, lsl #9 addhs r3, r3,r2, lsl #9 .L_divide_b9: cmp r1, r0, lsl #8 subhs r1, r1,r0, lsl #8 addhs r3, r3,r2, lsl #8 .L_divide_b8: cmp r1, r0, lsl #7 subhs r1, r1,r0, lsl #7 addhs r3, r3,r2, lsl #7 .L_divide_b7: cmp r1, r0, lsl #6 subhs r1, r1,r0, lsl #6 addhs r3, r3,r2, lsl #6 .L_divide_b6: cmp r1, r0, lsl #5 subhs r1, r1,r0, lsl #5 addhs r3, r3,r2, lsl #5 .L_divide_b5: cmp r1, r0, lsl #4 subhs r1, r1,r0, lsl #4 addhs r3, r3,r2, lsl #4 .L_divide_b4: cmp r1, r0, lsl #3 subhs r1, r1,r0, lsl #3 addhs r3, r3,r2, lsl #3 .L_divide_b3: cmp r1, r0, lsl #2 subhs r1, r1,r0, lsl #2 addhs r3, r3,r2, lsl #2 .L_divide_b2: cmp r1, r0, lsl #1 subhs r1, r1,r0, lsl #1 addhs r3, r3,r2, lsl #1 .L_divide_b1: cmp r1, r0 subhs r1, r1, r0 addhs r3, r3, r2 .L_divide_b0: tst ip, #0x20000000 bne .L_udivide_l1 mov r0, r3 cmp ip, #0 rsbmi r1, r1, #0 movs ip, ip, lsl #1 bicmi r0, r0, #0x80000000 /* Fix incase we divided 0x80000000 */ rsbmi r0, r0, #0 RET .L_udivide_l1: tst ip, #0x10000000 mov r1, r1, lsl #1 orrne r1, r1, #1 mov r3, r3, lsl #1 cmp r1, r0 subhs r1, r1, r0 addhs r3, r3, r2 mov r0, r3 RET #ifdef __ARM_EABI__ -END(__aeabi_idiv) -END(__aeabi_idivmod) +EEND(__aeabi_idiv) +EEND(__aeabi_idivmod) #endif END(__divsi3) Index: stable/10 =================================================================== --- stable/10 (revision 269795) +++ stable/10 (revision 269796) Property changes on: stable/10 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r269390