Index: lib/libc/powerpc64/string/bcopy.S =================================================================== --- lib/libc/powerpc64/string/bcopy.S +++ lib/libc/powerpc64/string/bcopy.S @@ -34,6 +34,11 @@ #define BLOCK_SIZE (1 << BLOCK_SIZE_BITS) #define BLOCK_SIZE_MASK (BLOCK_SIZE - 1) +/* Minimum 8 byte alignment, to avoid cache-inhibited alignment faults.*/ +#ifndef ALIGN_MASK +#define ALIGN_MASK 0x7 +#endif + #define MULTI_PHASE_THRESHOLD 512 #ifndef FN_NAME @@ -67,8 +72,14 @@ #endif cmpldi %r5, MULTI_PHASE_THRESHOLD - bge .Lmulti_phase - + blt .Lavoid_vsx + /*check if aligned. if aligned, go to multi_phase*/ + andi. %r8, %r3, ALIGN_MASK + andi. %r7, %r4, ALIGN_MASK + xor. %r6, %r7, %r8 /*compare and store*/ + beq .Lmulti_phase + +.Lavoid_vsx: /* align src */ cmpd %r4, %r3 /* forward or backward copy? */ blt .Lbackward_align @@ -106,6 +117,10 @@ li %r0, 1 li %r8, 16 li %r9, 0 + /*compare again if src and dst were relatively aligned*/ + cmpdi %r6, 0 + bne .Lunaligned + /*if they were, continue as normal*/ b .Lsingle_phase .Lbackward_single_copy: @@ -116,6 +131,10 @@ /* point src and dst to last byte */ addi %r3, %r3, -1 addi %r4, %r4, -1 + /*compare again if src and dst were relatively aligned*/ + cmpdi %r6, 0 + bne .Lunaligned + /*if they were, continue as normal*/ .Lsingle_phase: srdi. %r6, %r5, 4 /* number of 16-bytes */ @@ -145,6 +164,11 @@ beq .Ldone /* 1-bytes == 0? done */ mtctr %r6 + b .Lsingle_1_loop + +.Lunaligned: + /*copy ever byte with single_1_loop*/ + mtctr %r5 .align 5 .Lsingle_1_loop: lbz %r6, 0(%r4)