Index: sys/arm64/arm64/bzero.S =================================================================== --- /dev/null +++ sys/arm64/arm64/bzero.S @@ -0,0 +1,197 @@ +/*- + * Copyright (C) 2016 Cavium Inc. + * All rights reserved. + * + * Developed by Semihalf. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + + +#include "assym.s" + + /* + * void bzero(void *p, size_t size) + * + * x0 - p + * x1 - size + */ +ENTRY(bzero) + cbz x1, ending + + /* + * x5 is number of cache lines to zero (calculated later.) Should be + * kept 0 unless zeroing by cache lines is possible + */ + mov x5, xzr + + /* No use of cache assisted zero for buffers with size <= 16 */ + cmp x1, #0x10 + b.le normal + + /* + * Load size of line that will be cleaned by dc zva call. + * 0 means that the instruction is not allowed + */ + ldr x7, =dczva_line_size + ldr x7, [x7] + cbz x7, normal + + /* + * Buffer must be larger than cache line for using cache zeroing + * (and cache line aligned but this is checked after jump) + */ + cmp x1, x7 + b.lt normal + + /* + * Calculate number of bytes to cache aligned address (x4) nad + * number of full cache lines (x5). x6 is final address to zero. + */ + sub x2, x7, #0x01 + mov x3, -1 + eor x3, x3, x2 + add x4, x0, x2 + and x4, x4, x3 + subs x4, x4, x0 + b.eq normal + + /* Calculate number of "lines" in buffer */ + sub x5, x1, x4 + rbit x2, x7 + clz x2, x2 + lsr x5, x5, x2 + + /* + * If number of cache lines is 0, we will not be able to zero + * by cache lines, so go normal way. + */ + cbz x5, normal + /* x6 is final address to zero */ + add x6, x0, x1 + + /* + * We are here because x5 is non-0 so normal will be used to + * align buffer before cache zeroing. x4 holds number of bytes + * needed for alignment. + */ + mov x1, x4 + + /* When jumping here: x0 holds pointer, x1 holds size */ +normal: + /* + * Get buffer offset into 16 byte aligned address; 0 means pointer + * is aligned. + */ + ands x2, x0, #0x0f + b.eq aligned_to_16 + /* Calculate one-byte loop runs to 8 byte aligned address. */ + ands x2, x2, #0x07 + mov x3, #0x08 + sub x2, x3, x2 + /* x2 is number of bytes missing for alignment, x1 is buffer size */ + cmp x1, x2 + csel x2, x1, x2, le + sub x1, x1, x2 + + /* + * Byte by byte copy will copy at least enough bytes to align + * pointer and at most "size". + */ +align: + strb wzr, [x0], #0x01 + subs x2, x2, #0x01 + b.ne align + + /* Now pointer is aligned to 8 bytes */ + cmp x1, #0x10 + b.lt lead_out + /* + * Check if copy of another 8 bytes is needed to align to 16 byte + * address and do it + */ + tbz x0, #0x03, aligned_to_16 + str xzr, [x0], #0x08 + sub x1, x1, #0x08 + + /* While jumping here: x0 is 16 byte alligned address, x1 is size */ +aligned_to_16: + /* If size is less than 16 bytes, use lead_out to copy what remains */ + cmp x1, #0x10 + b.lt lead_out + + lsr x2, x1, #0x04 +zero_by_16: + stp xzr, xzr, [x0], #0x10 + subs x2, x2, #0x01 + b.ne zero_by_16 + + /* + * Lead out requires addresses to be aligned to 8 bytes. It is used to + * zero buffers with sizes < 16 and what can not be zeroed by + * zero_by_16 loop. + */ + ands x1, x1, #0x0f + b.eq lead_out_end +lead_out: + tbz x1, #0x03, lead_out_dword + str xzr, [x0], #0x08 +lead_out_dword: + tbz x1, #0x02, lead_out_word + str wzr, [x0], #0x04 +lead_out_word: + tbz x1, #0x01, lead_out_byte + strh wzr, [x0], #0x02 +lead_out_byte: + tbz x1, #0x00, lead_out_end + strb wzr, [x0], #0x01 + +lead_out_end: + /* + * If x5 is non-zero, this means that normal has been used as + * a lead in to align buffer address to cache size + */ + cbz x5, ending + + /* + * Here x5 holds number of lines to zero; x6 is final address of + * buffer. x0 is cache line aligned pointer. x7 is cache line size + * in bytes + */ +cache_line_zero: + dc zva, x0 + add x0, x0, x7 + subs x5, x5, #0x01 + b.ne cache_line_zero + + /* Need to zero remaining bytes? */ + subs x1, x6, x0 + b.ne normal + +ending: + ret + +END(bzero) + Index: sys/arm64/arm64/machdep.c =================================================================== --- sys/arm64/arm64/machdep.c +++ sys/arm64/arm64/machdep.c @@ -108,6 +108,7 @@ int64_t dcache_line_size; /* The minimum D cache line size */ int64_t icache_line_size; /* The minimum I cache line size */ int64_t idcache_line_size; /* The minimum cache line size */ +int64_t dczva_line_size; /* The size of cache line the dc zva zeroes */ static void cpu_startup(void *dummy) @@ -129,16 +130,6 @@ return (0); } -void -bzero(void *buf, size_t len) -{ - uint8_t *p; - - p = buf; - while(len-- > 0) - *p++ = 0; -} - int fill_regs(struct thread *td, struct reg *regs) { @@ -800,8 +791,9 @@ static void cache_setup(void) { - int dcache_line_shift, icache_line_shift; + int dcache_line_shift, icache_line_shift, dczva_line_shift; uint32_t ctr_el0; + uint32_t dczid_el0; ctr_el0 = READ_SPECIALREG(ctr_el0); @@ -815,6 +807,17 @@ icache_line_size = sizeof(int) << icache_line_shift; idcache_line_size = MIN(dcache_line_size, icache_line_size); + + dczid_el0 = READ_SPECIALREG(dczid_el0); + + /* Check if dc zva is not prohibited */ + if (dczid_el0 & DCZID_DZP) + dczva_line_size = 0; + else { + /* Same as with above calculations */ + dczva_line_shift = DCZID_BS_SIZE(dczid_el0); + dczva_line_size = sizeof(int) << dczva_line_shift; + } } void Index: sys/arm64/include/armreg.h =================================================================== --- sys/arm64/include/armreg.h +++ sys/arm64/include/armreg.h @@ -66,6 +66,12 @@ #define CTR_ILINE_MASK (0xf << CTR_ILINE_SHIFT) #define CTR_ILINE_SIZE(reg) (((reg) & CTR_ILINE_MASK) >> CTR_ILINE_SHIFT) +/* DCZID_EL0 - Data Cache Zero ID register */ +#define DCZID_DZP (1 << 4) /* DC ZVA prohibited if non-0 */ +#define DCZID_BS_SHIFT 0 +#define DCZID_BS_MASK (0xf << DCZID_BS_SHIFT) +#define DCZID_BS_SIZE(reg) (((reg) & DCZID_BS_MASK) >> DCZID_BS_SHIFT) + /* ESR_ELx */ #define ESR_ELx_ISS_MASK 0x00ffffff #define ISS_INSN_FnV (0x01 << 10) Index: sys/conf/files.arm64 =================================================================== --- sys/conf/files.arm64 +++ sys/conf/files.arm64 @@ -12,6 +12,7 @@ arm64/arm64/bus_space_asm.S standard arm64/arm64/busdma_bounce.c standard arm64/arm64/busdma_machdep.c standard +arm64/arm64/bzero.S standard arm64/arm64/clock.c standard arm64/arm64/copyinout.S standard arm64/arm64/copystr.c standard