Index: sys/arm64/arm64/bzero.S =================================================================== --- /dev/null +++ sys/arm64/arm64/bzero.S @@ -0,0 +1,176 @@ +/*- + * * Copyright (C) 2016 Cavium Inc. + * All rights reserved. + * + * Developed by Semihalf. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + + +#include "assym.s" + + /* + * void bzero(void *p, size_t size) + * + * x0 - p + * x1 - size + */ +ENTRY(bzero) + cbz x0, ending + cbz x1, ending + + /* x5 is number of cache lines to zeroe (calculated later.) Should be + * kept 0 unless zeroing by cache lines is possible */ + mov x5, xzr + + /* If cache zeroing may not be used, go normal way */ + mrs x2, DCZID_EL0 + tbnz x2, #0x04, normal + + /* Get the cache size (in bytes) to x7. Do not overwrite it below! */ + and x2, x2, 0x0f + mov x3, #4 + lsl x7, x3, x2 + + /* Buffer must be larger than cache line for using cache zeroing + * (and cache line aligned but this is checked after jump) */ + cmp x1, x7 + b.lt normal + + /* Calculate number of bytes to cache aligned address (x4), + * number of full cache lines (x5) and number of bytes to + * complete buffer (x6). */ + sub x2, x7, #0x01 + mov x3, -1 + eor x3, x3, x2 + add x4, x0, x2 + and x4, x4, x3 + sub x4, x4, x0 + + sub x5, x1, x4 + udiv x5, x5, x7 + + /* If number of cache lines is 0, we will not be able to zero + * by cache lines, so go normal way */ + cbz x5, normal + + /* Calculate bytes left to complete */ + mul x6, x5, x7 + sub x6, x1, x6 + sub x6, x6, x4 + + /* We are here because x5 is non-0 so normal will be used to + * align buffer before cache zeroing. x4 holds number of bytes + * needed for alignment */ + mov x1, x4 + + /* When jumping here: x0 holds pointer, x1 holds size */ +normal: + /* Get buffer offset into 16 byte aligned address; 0 means pointer + * is aligned. */ + ands x2, x0, #0x0f + b.eq aligned_to_16 + /* Calculate one-byte loop runs to 8 byte aligned address. */ + ands x2, x2, #0x07 + mov x3, #0x08 + sub x2, x3, x2 + /* x2 is number of bytes missing for alignment, x1 is buffer size */ + cmp x1, x2 + csel x2, x1, x2, le + sub x1, x1, x2 + + /* + * Byte by byte copy will copy at least enough bytes to align pointer + * and at most "size". */ +align: + strb wzr, [x0], #0x01 + subs x2, x2, #0x01 + b.ne align + + /* Now pointer is aligned to 8 bytes */ + cmp x1, #0x10 + b.lt lead_out + /* Check if copy of another 8 bytes is needed to align to 16 byte address + * and do it */ + tbz x0, #0x03, aligned_to_16 + str xzr, [x0], #0x08 + sub x1, x1, #0x08 + + /* While jumping here: x0 is 16 byte alligned address, x1 is size */ +aligned_to_16: + /* If size is less than 16 bytes, use lead_out to copy what remains */ + cmp x1, #0x10 + b.lt lead_out + + lsr x2, x1, #0x04 +zero_by_16: + stp xzr, xzr, [x0], #0x10 + subs x2, x2, #0x01 + b.ne zero_by_16 + + /* + * Lead out requires addresses to be aligned to 8 bytes. It is used to + * zero buffers with sizes < 16 and what can not be zeroed by + * zero_by_16 loop */ + ands x1, x1, #0x0f + b.eq lead_out_end +lead_out: + tbz x1, #0x03, lead_out_dword + str xzr, [x0], #0x08 +lead_out_dword: + tbz x1, #0x02, lead_out_word + str wzr, [x0], #0x04 +lead_out_word: + tbz x1, #0x01, lead_out_byte + strh wzr, [x0], #0x02 +lead_out_byte: + tbz x1, #0x00, lead_out_end + strb wzr, [x0], #0x01 + +lead_out_end: + /* If x5 is non-zero, this means that normal has been used as + * a lead in to align buffer address to cache size */ + cbz x5, ending + + /* Here x5 holds number of lines to zero; x6 number of bytes + * after lines to complete buffer. x0 is cache line aligned + * pointer. x7 is cache line size in bytes */ +cache_line_zero: + dc zva, x0 + add x0, x0, x7 + sub x1, x1, x7 + subs x5, x5, #0x01 + b.ne cache_line_zero + + /* If x6 is non-zero, this is what has left to complete zeroing */ + mov x1, x6 + b normal + +ending: + ret + +END(bzero) + Index: sys/arm64/arm64/machdep.c =================================================================== --- sys/arm64/arm64/machdep.c +++ sys/arm64/arm64/machdep.c @@ -129,16 +129,6 @@ return (0); } -void -bzero(void *buf, size_t len) -{ - uint8_t *p; - - p = buf; - while(len-- > 0) - *p++ = 0; -} - int fill_regs(struct thread *td, struct reg *regs) { Index: sys/conf/files.arm64 =================================================================== --- sys/conf/files.arm64 +++ sys/conf/files.arm64 @@ -35,6 +35,7 @@ arm64/arm64/in_cksum.c optional inet | inet6 arm64/arm64/locore.S standard no-obj arm64/arm64/machdep.c standard +arm64/arm64/bzero.S standard arm64/arm64/mem.c standard arm64/arm64/minidump_machdep.c standard arm64/arm64/mp_machdep.c optional smp