Index: sys/arm64/arm64/copyinout.S =================================================================== --- sys/arm64/arm64/copyinout.S +++ sys/arm64/arm64/copyinout.S @@ -51,24 +51,17 @@ * int copyout(const void *kaddr, void *udaddr, size_t len) */ ENTRY(copyout) - cbz x2, 2f /* If len == 0 then skip loop */ + cbz x2, 1f add x3, x1, x2 ldr x4, =VM_MAXUSER_ADDRESS cmp x3, x4 b.hi copyio_fault_nopcb - adr x6, copyio_fault /* Get the handler address */ - SET_FAULT_HANDLER(x6, x7) /* Set the handler */ - -1: ldrb w4, [x0], #1 /* Load from kaddr */ - strb w4, [x1], #1 /* Store in uaddr */ - sub x2, x2, #1 /* len-- */ - cbnz x2, 1b - - SET_FAULT_HANDLER(xzr, x7) /* Clear the handler */ + b copycommon -2: mov x0, xzr /* return 0 */ +1: mov x0, xzr /* return 0 */ ret + END(copyout) /* @@ -77,24 +70,17 @@ * int copyin(const void *uaddr, void *kdaddr, size_t len) */ ENTRY(copyin) - cbz x2, 2f /* If len == 0 then skip loop */ + cbz x2, 1f add x3, x0, x2 ldr x4, =VM_MAXUSER_ADDRESS cmp x3, x4 b.hi copyio_fault_nopcb - adr x6, copyio_fault /* Get the handler address */ - SET_FAULT_HANDLER(x6, x7) /* Set the handler */ + b copycommon -1: ldrb w4, [x0], #1 /* Load from uaddr */ - strb w4, [x1], #1 /* Store in kaddr */ - sub x2, x2, #1 /* len-- */ - cbnz x2, 1b - - SET_FAULT_HANDLER(xzr, x7) /* Clear the handler */ - -2: mov x0, xzr /* return 0 */ +1: mov x0, xzr /* return 0 */ ret + END(copyin) /* @@ -130,3 +116,94 @@ csel w0, wzr, w1, eq /* If so return success, else failure */ ret END(copyinstr) + +/* + * Local helper + * + * x0 - src pointer + * x1 - dst pointer + * x2 - size + * lr - the return address, so jump here instead of calling + */ + .text + .align 4 + .local copycommon + .type copycommon,@function + +copycommon: + adr x6, copyio_fault /* Get the handler address */ + SET_FAULT_HANDLER(x6, x7) /* Set the handler */ + + + /* Check alignment */ + orr x3, x0, x1 + ands x3, x3, 0x07 + b.eq aligned + + /* Unaligned is byte by byte copy */ +byte_by_byte: + ldrb w3, [x0], #0x01 + strb w3, [x1], #0x01 + subs x2, x2, #0x01 + b.ne byte_by_byte + b 7f + +aligned: + cmp x2, #0x10 + b.lt 2f + cmp x2, #0x40 + b.lt qqword_by_qword_start + + /* Block copy */ + lsr x15, x2, #0x06 +qblock: + ldp x3, x4, [x0], #0x10 + ldp x5, x6, [x0], #0x10 + ldp x7, x8, [x0], #0x10 + ldp x9, x10, [x0], #0x10 + stp x3, x4, [x1], #0x10 + stp x5, x6, [x1], #0x10 + stp x7, x8, [x1], #0x10 + stp x9, x10, [x1], #0x10 + + subs x15, x15, #0x01 + b.ne qblock + + and x2, x2, #0x3f + +qqword_by_qword_start: + lsr x15, x2, #0x04 + cbz x15, 2f +1: + ldp x3, x4, [x0], #0x10 + stp x3, x4, [x1], #0x10 + subs x15, x15, #0x01 + b.ne 1b + + /* Less than 16 bytes to coppy */ +2: + tbz x2, #0x03, 3f + ldr x3, [x0], #0x08 + str x3, [x1], #0x08 + +3: + tbz x2, #0x02, 4f + ldr w3, [x0], #0x04 + str w3, [x1], #0x04 + +4: + tbz x2, #0x01, 5f + ldrh w3, [x0], #0x02 + strh w3, [x1], #0x02 + +5: + tbz x2, #0x00, 6f + ldrb w3, [x0] + strb w3, [x1] + +6: + SET_FAULT_HANDLER(xzr, x7) /* Clear the handler */ + +7: mov x0, xzr /* return 0 */ + ret + .size copycommon, . - copycommon