Index: sys/arm/arm/cpufunc.c =================================================================== --- sys/arm/arm/cpufunc.c +++ sys/arm/arm/cpufunc.c @@ -249,14 +249,14 @@ /* MMU functions */ cpufunc_control, /* control */ - armv7_setttb, /* Setttb */ + armv7_setttb_smp, /* Setttb */ /* TLB functions */ - armv7_tlb_flushID, /* tlb_flushID */ - armv7_tlb_flushID_SE, /* tlb_flushID_SE */ - armv7_tlb_flushID, /* tlb_flushD */ - armv7_tlb_flushID_SE, /* tlb_flushD_SE */ + armv7_tlb_flushID_smp, /* tlb_flushID */ + armv7_tlb_flushID_SE_smp, /* tlb_flushID_SE */ + armv7_tlb_flushID_smp, /* tlb_flushD */ + armv7_tlb_flushID_SE_smp, /* tlb_flushD_SE */ /* Cache operations */ armv7_icache_sync_range, /* icache_sync_range */ @@ -267,7 +267,7 @@ armv7_dcache_wb_range, /* dcache_wb_range */ armv7_idcache_inv_all, /* idcache_inv_all */ - armv7_idcache_wbinv_all, /* idcache_wbinv_all */ + armv7_idcache_wbinv_all_smp, /* idcache_wbinv_all */ armv7_idcache_wbinv_range, /* idcache_wbinv_all */ (void *)cpufunc_nullop, /* l2cache_wbinv_all */ @@ -283,7 +283,7 @@ (void *)cpufunc_nullop, /* sleep */ /* Soft functions */ - armv7_context_switch, /* context_switch */ + armv7_context_switch_smp, /* context_switch */ pj4bv7_setup /* cpu setup */ }; @@ -495,7 +495,7 @@ #endif /*CPU_ARM1176 */ #if defined(CPU_CORTEXA) || defined(CPU_KRAIT) -struct cpu_functions cortexa_cpufuncs = { +struct cpu_functions cortexa_up_cpufuncs = { /* CPU functions */ cpufunc_nullop, /* cpwait */ @@ -503,7 +503,7 @@ /* MMU functions */ cpufunc_control, /* control */ - armv7_setttb, /* Setttb */ + armv7_setttb_up, /* Setttb */ /* * TLB functions. ARMv7 does all TLB ops based on a unified TLB model @@ -511,10 +511,10 @@ * same 'ID' functions for all 3 variations. */ - armv7_tlb_flushID, /* tlb_flushID */ - armv7_tlb_flushID_SE, /* tlb_flushID_SE */ - armv7_tlb_flushID, /* tlb_flushD */ - armv7_tlb_flushID_SE, /* tlb_flushD_SE */ + armv7_tlb_flushID_up, /* tlb_flushID */ + armv7_tlb_flushID_SE_up, /* tlb_flushID_SE */ + armv7_tlb_flushID_up, /* tlb_flushD */ + armv7_tlb_flushID_SE_up, /* tlb_flushD_SE */ /* Cache operations */ @@ -526,7 +526,7 @@ armv7_dcache_wb_range, /* dcache_wb_range */ armv7_idcache_inv_all, /* idcache_inv_all */ - armv7_idcache_wbinv_all, /* idcache_wbinv_all */ + armv7_idcache_wbinv_all_up, /* idcache_wbinv_all */ armv7_idcache_wbinv_range, /* idcache_wbinv_range */ /* @@ -547,10 +547,69 @@ /* Soft functions */ - armv7_context_switch, /* context_switch */ + armv7_context_switch_up, /* context_switch */ cortexa_setup /* cpu setup */ }; + +#ifdef SMP +struct cpu_functions cortexa_smp_cpufuncs = { + /* CPU functions */ + + cpufunc_nullop, /* cpwait */ + + /* MMU functions */ + + cpufunc_control, /* control */ + armv7_setttb_smp, /* Setttb */ + + /* + * TLB functions. ARMv7 does all TLB ops based on a unified TLB model + * whether the hardware implements separate I+D or not, so we use the + * same 'ID' functions for all 3 variations. + */ + + armv7_tlb_flushID_smp, /* tlb_flushID */ + armv7_tlb_flushID_SE_smp, /* tlb_flushID_SE */ + armv7_tlb_flushID_smp, /* tlb_flushD */ + armv7_tlb_flushID_SE_smp, /* tlb_flushD_SE */ + + /* Cache operations */ + + armv7_icache_sync_range, /* icache_sync_range */ + + armv7_dcache_wbinv_all, /* dcache_wbinv_all */ + armv7_dcache_wbinv_range, /* dcache_wbinv_range */ + armv7_dcache_inv_range, /* dcache_inv_range */ + armv7_dcache_wb_range, /* dcache_wb_range */ + + armv7_idcache_inv_all, /* idcache_inv_all */ + armv7_idcache_wbinv_all_smp, /* idcache_wbinv_all */ + armv7_idcache_wbinv_range, /* idcache_wbinv_range */ + + /* + * Note: For CPUs using the PL310 the L2 ops are filled in when the + * L2 cache controller is actually enabled. + */ + cpufunc_nullop, /* l2cache_wbinv_all */ + (void *)cpufunc_nullop, /* l2cache_wbinv_range */ + (void *)cpufunc_nullop, /* l2cache_inv_range */ + (void *)cpufunc_nullop, /* l2cache_wb_range */ + (void *)cpufunc_nullop, /* l2cache_drain_writebuf */ + + /* Other functions */ + + armv7_drain_writebuf, /* drain_writebuf */ + + armv7_cpu_sleep, /* sleep */ + + /* Soft functions */ + + armv7_context_switch_smp, /* context_switch */ + + cortexa_setup /* cpu setup */ +}; +#endif #endif /* CPU_CORTEXA */ /* @@ -762,14 +821,21 @@ #endif /* CPU_ARM1176 */ #if defined(CPU_CORTEXA) || defined(CPU_KRAIT) switch(cputype & CPU_ID_SCHEME_MASK) { + case CPU_ID_CORTEXA8: + cpufuncs = cortexa_up_cpufuncs; + get_cachetype_cp15(); + break; case CPU_ID_CORTEXA5: case CPU_ID_CORTEXA7: - case CPU_ID_CORTEXA8: case CPU_ID_CORTEXA9: case CPU_ID_CORTEXA12: case CPU_ID_CORTEXA15: case CPU_ID_KRAIT300: - cpufuncs = cortexa_cpufuncs; +#ifdef SMP + cpufuncs = cortexa_smp_cpufuncs; +#else + cpufuncs = cortexa_up_cpufuncs; +#endif get_cachetype_cp15(); goto out; default: Index: sys/arm/arm/cpufunc_asm_armv7.S =================================================================== --- sys/arm/arm/cpufunc_asm_armv7.S +++ sys/arm/arm/cpufunc_asm_armv7.S @@ -37,20 +37,6 @@ .cpu cortex-a8 -.Lcoherency_level: - .word _C_LABEL(arm_cache_loc) -.Lcache_type: - .word _C_LABEL(arm_cache_type) -.Larmv7_dcache_line_size: - .word _C_LABEL(arm_dcache_min_line_size) -.Larmv7_icache_line_size: - .word _C_LABEL(arm_icache_min_line_size) -.Larmv7_idcache_line_size: - .word _C_LABEL(arm_idcache_min_line_size) -.Lway_mask: - .word 0x3ff -.Lmax_index: - .word 0x7fff .Lpage_mask: .word 0xfff @@ -65,18 +51,26 @@ #define PT_OUTER_WB (3 << 3) #define PT_OUTER_WBWA (1 << 3) -#ifdef SMP +#ifndef CPU_SMP +#error Don't build cpufunc_asm_armv7.S directly +#endif + +#if CPU_SMP > 0 #define PT_ATTR (PT_S|PT_INNER_WBWA|PT_OUTER_WBWA|PT_NOS) +#define FUNC_ENTRY(x) ENTRY(x ## _smp) +#define FUNC_END(x) END(x ## _smp) #else #define PT_ATTR (PT_INNER_WBWA|PT_OUTER_WBWA) +#define FUNC_ENTRY(x) ENTRY(x ## _up) +#define FUNC_END(x) END(x ## _up) #endif -ENTRY(armv7_setttb) +FUNC_ENTRY(armv7_setttb) dsb orr r0, r0, #PT_ATTR mcr CP15_TTBR0(r0) isb -#ifdef SMP +#if CPU_SMP > 0 mcr CP15_TLBIALLIS #else mcr CP15_TLBIALL @@ -84,11 +78,11 @@ dsb isb RET -END(armv7_setttb) +FUNC_END(armv7_setttb) -ENTRY(armv7_tlb_flushID) +FUNC_ENTRY(armv7_tlb_flushID) dsb -#ifdef SMP +#if CPU_SMP > 0 mcr CP15_TLBIALLIS mcr CP15_BPIALLIS #else @@ -98,12 +92,12 @@ dsb isb mov pc, lr -END(armv7_tlb_flushID) +FUNC_END(armv7_tlb_flushID) -ENTRY(armv7_tlb_flushID_SE) +FUNC_ENTRY(armv7_tlb_flushID_SE) ldr r1, .Lpage_mask bic r0, r0, r1 -#ifdef SMP +#if CPU_SMP > 0 mcr CP15_TLBIMVAAIS(r0) mcr CP15_BPIALLIS #else @@ -113,65 +107,12 @@ dsb isb mov pc, lr -END(armv7_tlb_flushID_SE) - -/* Based on algorithm from ARM Architecture Reference Manual */ -ENTRY(armv7_dcache_wbinv_all) - stmdb sp!, {r4, r5, r6, r7, r8, r9} - - /* Get cache level */ - ldr r0, .Lcoherency_level - ldr r3, [r0] - cmp r3, #0 - beq Finished - /* For each cache level */ - mov r8, #0 -Loop1: - /* Get cache type for given level */ - mov r2, r8, lsl #2 - add r2, r2, r2 - ldr r0, .Lcache_type - ldr r1, [r0, r2] - - /* Get line size */ - and r2, r1, #7 - add r2, r2, #4 - - /* Get number of ways */ - ldr r4, .Lway_mask - ands r4, r4, r1, lsr #3 - clz r5, r4 - - /* Get max index */ - ldr r7, .Lmax_index - ands r7, r7, r1, lsr #13 -Loop2: - mov r9, r4 -Loop3: - mov r6, r8, lsl #1 - orr r6, r6, r9, lsl r5 - orr r6, r6, r7, lsl r2 - - /* Clean and invalidate data cache by way/index */ - mcr CP15_DCCISW(r6) - subs r9, r9, #1 - bge Loop3 - subs r7, r7, #1 - bge Loop2 -Skip: - add r8, r8, #1 - cmp r3, r8 - bne Loop1 -Finished: - dsb - ldmia sp!, {r4, r5, r6, r7, r8, r9} - RET -END(armv7_dcache_wbinv_all) +FUNC_END(armv7_tlb_flushID_SE) -ENTRY(armv7_idcache_wbinv_all) +FUNC_ENTRY(armv7_idcache_wbinv_all) stmdb sp!, {lr} bl armv7_dcache_wbinv_all -#ifdef SMP +#if CPU_SMP > 0 mcr CP15_ICIALLUIS #else mcr CP15_ICIALLU @@ -180,109 +121,15 @@ isb ldmia sp!, {lr} RET -END(armv7_idcache_wbinv_all) - -ENTRY(armv7_dcache_wb_range) - ldr ip, .Larmv7_dcache_line_size - ldr ip, [ip] - sub r3, ip, #1 - and r2, r0, r3 - add r1, r1, r2 - bic r0, r0, r3 -.Larmv7_wb_next: - mcr CP15_DCCMVAC(r0) - add r0, r0, ip - subs r1, r1, ip - bhi .Larmv7_wb_next - dsb /* data synchronization barrier */ - RET -END(armv7_dcache_wb_range) - -ENTRY(armv7_dcache_wbinv_range) - ldr ip, .Larmv7_dcache_line_size - ldr ip, [ip] - sub r3, ip, #1 - and r2, r0, r3 - add r1, r1, r2 - bic r0, r0, r3 -.Larmv7_wbinv_next: - mcr CP15_DCCIMVAC(r0) - add r0, r0, ip - subs r1, r1, ip - bhi .Larmv7_wbinv_next - dsb /* data synchronization barrier */ - RET -END(armv7_dcache_wbinv_range) - -/* - * Note, we must not invalidate everything. If the range is too big we - * must use wb-inv of the entire cache. - */ -ENTRY(armv7_dcache_inv_range) - ldr ip, .Larmv7_dcache_line_size - ldr ip, [ip] - sub r3, ip, #1 - and r2, r0, r3 - add r1, r1, r2 - bic r0, r0, r3 -.Larmv7_inv_next: - mcr CP15_DCIMVAC(r0) - add r0, r0, ip - subs r1, r1, ip - bhi .Larmv7_inv_next - dsb /* data synchronization barrier */ - RET -END(armv7_dcache_inv_range) - -ENTRY(armv7_idcache_wbinv_range) - ldr ip, .Larmv7_idcache_line_size - ldr ip, [ip] - sub r3, ip, #1 - and r2, r0, r3 - add r1, r1, r2 - bic r0, r0, r3 -.Larmv7_id_wbinv_next: - mcr CP15_ICIMVAU(r0) - mcr CP15_DCCIMVAC(r0) - add r0, r0, ip - subs r1, r1, ip - bhi .Larmv7_id_wbinv_next - dsb /* data synchronization barrier */ - isb /* instruction synchronization barrier */ - RET -END(armv7_idcache_wbinv_range) - +FUNC_END(armv7_idcache_wbinv_all) -ENTRY_NP(armv7_icache_sync_range) - ldr ip, .Larmv7_icache_line_size - ldr ip, [ip] - sub r3, ip, #1 /* Address need not be aligned, but */ - and r2, r0, r3 /* round length up if op spans line */ - add r1, r1, r2 /* boundary: len += addr & linemask; */ -.Larmv7_sync_next: - mcr CP15_DCCMVAC(r0) - mcr CP15_ICIMVAU(r0) - add r0, r0, ip - subs r1, r1, ip - bhi .Larmv7_sync_next - dsb /* data synchronization barrier */ - isb /* instruction synchronization barrier */ - RET -END(armv7_icache_sync_range) - -ENTRY(armv7_cpu_sleep) - dsb /* data synchronization barrier */ - wfi /* wait for interrupt */ - RET -END(armv7_cpu_sleep) - -ENTRY(armv7_context_switch) +FUNC_ENTRY(armv7_context_switch) dsb orr r0, r0, #PT_ATTR mcr CP15_TTBR0(r0) isb -#ifdef SMP +#if CPU_SMP > 0 mcr CP15_TLBIALLIS #else mcr CP15_TLBIALL @@ -290,69 +137,5 @@ dsb isb RET -END(armv7_context_switch) - -ENTRY(armv7_drain_writebuf) - dsb - RET -END(armv7_drain_writebuf) - -ENTRY(armv7_sev) - dsb - sev - nop - RET -END(armv7_sev) - -ENTRY(armv7_auxctrl) - mrc CP15_ACTLR(r2) - bic r3, r2, r0 /* Clear bits */ - eor r3, r3, r1 /* XOR bits */ - - teq r2, r3 - mcrne CP15_ACTLR(r3) - mov r0, r2 - RET -END(armv7_auxctrl) - -/* - * Invalidate all I+D+branch cache. Used by startup code, which counts - * on the fact that only r0-r3,ip are modified and no stack space is used. - */ -ENTRY(armv7_idcache_inv_all) - mov r0, #0 - mcr CP15_CSSELR(r0) @ set cache level to L1 - mrc CP15_CCSIDR(r0) - - ubfx r2, r0, #13, #15 @ get num sets - 1 from CCSIDR - ubfx r3, r0, #3, #10 @ get numways - 1 from CCSIDR - clz r1, r3 @ number of bits to MSB of way - lsl r3, r3, r1 @ shift into position - mov ip, #1 @ - lsl ip, ip, r1 @ ip now contains the way decr - - ubfx r0, r0, #0, #3 @ get linesize from CCSIDR - add r0, r0, #4 @ apply bias - lsl r2, r2, r0 @ shift sets by log2(linesize) - add r3, r3, r2 @ merge numsets - 1 with numways - 1 - sub ip, ip, r2 @ subtract numsets - 1 from way decr - mov r1, #1 - lsl r1, r1, r0 @ r1 now contains the set decr - mov r2, ip @ r2 now contains set way decr - - /* r3 = ways/sets, r2 = way decr, r1 = set decr, r0 and ip are free */ -1: mcr CP15_DCISW(r3) @ invalidate line - movs r0, r3 @ get current way/set - beq 2f @ at 0 means we are done. - movs r0, r0, lsl #10 @ clear way bits leaving only set bits - subne r3, r3, r1 @ non-zero?, decrement set # - subeq r3, r3, r2 @ zero?, decrement way # and restore set count - b 1b - -2: dsb @ wait for stores to finish - mov r0, #0 @ and ... - mcr CP15_ICIALLU @ invalidate instruction+branch cache - isb @ instruction sync barrier - bx lr @ return -END(armv7_idcache_inv_all) +FUNC_END(armv7_context_switch) Index: sys/arm/arm/cpufunc_asm_armv7_common.S =================================================================== --- sys/arm/arm/cpufunc_asm_armv7_common.S +++ sys/arm/arm/cpufunc_asm_armv7_common.S @@ -51,69 +51,6 @@ .word 0x3ff .Lmax_index: .word 0x7fff -.Lpage_mask: - .word 0xfff - -#define PT_NOS (1 << 5) -#define PT_S (1 << 1) -#define PT_INNER_NC 0 -#define PT_INNER_WT (1 << 0) -#define PT_INNER_WB ((1 << 0) | (1 << 6)) -#define PT_INNER_WBWA (1 << 6) -#define PT_OUTER_NC 0 -#define PT_OUTER_WT (2 << 3) -#define PT_OUTER_WB (3 << 3) -#define PT_OUTER_WBWA (1 << 3) - -#ifdef SMP -#define PT_ATTR (PT_S|PT_INNER_WBWA|PT_OUTER_WBWA|PT_NOS) -#else -#define PT_ATTR (PT_INNER_WBWA|PT_OUTER_WBWA) -#endif - -ENTRY(armv7_setttb) - dsb - orr r0, r0, #PT_ATTR - mcr CP15_TTBR0(r0) - isb -#ifdef SMP - mcr CP15_TLBIALLIS -#else - mcr CP15_TLBIALL -#endif - dsb - isb - RET -END(armv7_setttb) - -ENTRY(armv7_tlb_flushID) - dsb -#ifdef SMP - mcr CP15_TLBIALLIS - mcr CP15_BPIALLIS -#else - mcr CP15_TLBIALL - mcr CP15_BPIALL -#endif - dsb - isb - mov pc, lr -END(armv7_tlb_flushID) - -ENTRY(armv7_tlb_flushID_SE) - ldr r1, .Lpage_mask - bic r0, r0, r1 -#ifdef SMP - mcr CP15_TLBIMVAAIS(r0) - mcr CP15_BPIALLIS -#else - mcr CP15_TLBIMVA(r0) - mcr CP15_BPIALL -#endif - dsb - isb - mov pc, lr -END(armv7_tlb_flushID_SE) /* Based on algorithm from ARM Architecture Reference Manual */ ENTRY(armv7_dcache_wbinv_all) @@ -168,20 +105,6 @@ RET END(armv7_dcache_wbinv_all) -ENTRY(armv7_idcache_wbinv_all) - stmdb sp!, {lr} - bl armv7_dcache_wbinv_all -#ifdef SMP - mcr CP15_ICIALLUIS -#else - mcr CP15_ICIALLU -#endif - dsb - isb - ldmia sp!, {lr} - RET -END(armv7_idcache_wbinv_all) - ENTRY(armv7_dcache_wb_range) ldr ip, .Larmv7_dcache_line_size ldr ip, [ip] @@ -276,22 +199,6 @@ RET END(armv7_cpu_sleep) -ENTRY(armv7_context_switch) - dsb - orr r0, r0, #PT_ATTR - - mcr CP15_TTBR0(r0) - isb -#ifdef SMP - mcr CP15_TLBIALLIS -#else - mcr CP15_TLBIALL -#endif - dsb - isb - RET -END(armv7_context_switch) - ENTRY(armv7_drain_writebuf) dsb RET Index: sys/arm/arm/cpufunc_asm_armv7_smp.S =================================================================== --- /dev/null +++ sys/arm/arm/cpufunc_asm_armv7_smp.S @@ -0,0 +1,9 @@ +/* $FreeBSD$ */ + +#ifdef SMP + +/* Build for SMP */ +#define CPU_SMP 1 +#include "cpufunc_asm_armv7.S" + +#endif Index: sys/arm/arm/cpufunc_asm_armv7_up.S =================================================================== --- /dev/null +++ sys/arm/arm/cpufunc_asm_armv7_up.S @@ -0,0 +1,5 @@ +/* $FreeBSD$ */ + +/* Build for UP */ +#define CPU_SMP 0 +#include "cpufunc_asm_armv7.S" Index: sys/arm/include/cpufunc.h =================================================================== --- sys/arm/include/cpufunc.h +++ sys/arm/include/cpufunc.h @@ -274,20 +274,25 @@ void armv6_idcache_wbinv_all (void); #endif #if defined(CPU_MV_PJ4B) || defined(CPU_CORTEXA) || defined(CPU_KRAIT) -void armv7_setttb (u_int); -void armv7_tlb_flushID (void); -void armv7_tlb_flushID_SE (u_int); +void armv7_setttb_up (u_int); +void armv7_setttb_smp (u_int); +void armv7_tlb_flushID_up (void); +void armv7_tlb_flushID_smp (void); +void armv7_tlb_flushID_SE_up (u_int); +void armv7_tlb_flushID_SE_smp (u_int); void armv7_icache_sync_range (vm_offset_t, vm_size_t); void armv7_idcache_wbinv_range (vm_offset_t, vm_size_t); void armv7_idcache_inv_all (void); void armv7_dcache_wbinv_all (void); -void armv7_idcache_wbinv_all (void); +void armv7_idcache_wbinv_all_up (void); +void armv7_idcache_wbinv_all_smp (void); void armv7_dcache_wbinv_range (vm_offset_t, vm_size_t); void armv7_dcache_inv_range (vm_offset_t, vm_size_t); void armv7_dcache_wb_range (vm_offset_t, vm_size_t); void armv7_cpu_sleep (int); void armv7_setup (void); -void armv7_context_switch (void); +void armv7_context_switch_up (void); +void armv7_context_switch_smp (void); void armv7_drain_writebuf (void); void armv7_sev (void); u_int armv7_auxctrl (u_int, u_int); Index: sys/conf/Makefile.arm =================================================================== --- sys/conf/Makefile.arm +++ sys/conf/Makefile.arm @@ -72,7 +72,8 @@ $S/$M/$M/cpufunc_asm_xscale_c3.S $S/$M/$M/cpufunc_asm_armv5_ec.S \ $S/$M/$M/cpufunc_asm_fa526.S $S/$M/$M/cpufunc_asm_sheeva.S \ $S/$M/$M/cpufunc_asm_pj4b.S $S/$M/$M/cpufunc_asm_armv6.S \ - $S/$M/$M/cpufunc_asm_armv7.S + $S/$M/$M/cpufunc_asm_armv7_up.S $S/$M/$M/cpufunc_asm_armv7_smp.S \ + $S/$M/$M/cpufunc_asm_armv7_common.S .if defined(KERNPHYSADDR) KERNEL_EXTRA=trampoline Index: sys/conf/files.arm =================================================================== --- sys/conf/files.arm +++ sys/conf/files.arm @@ -32,7 +32,9 @@ arm/arm/cpufunc_asm_armv4.S optional cpu_arm9 | cpu_arm9e | cpu_fa526 | cpu_xscale_pxa2x0 | cpu_xscale_ixp425 | cpu_xscale_81342 arm/arm/cpufunc_asm_armv5_ec.S optional cpu_arm9e arm/arm/cpufunc_asm_armv6.S optional cpu_arm1176 -arm/arm/cpufunc_asm_armv7.S optional cpu_cortexa | cpu_krait | cpu_mv_pj4b +arm/arm/cpufunc_asm_armv7_common.S optional cpu_cortexa | cpu_krait | cpu_mv_pj4b +arm/arm/cpufunc_asm_armv7_up.S optional cpu_cortexa | cpu_krait | cpu_mv_pj4b +arm/arm/cpufunc_asm_armv7_smp.S optional cpu_cortexa smp | cpu_krait smp | cpu_mv_pj4b smp arm/arm/cpufunc_asm_fa526.S optional cpu_fa526 arm/arm/cpufunc_asm_pj4b.S optional cpu_mv_pj4b arm/arm/cpufunc_asm_sheeva.S optional cpu_arm9e