diff --git a/tools/test/xregs_sig/c2x2c_aarch64.S b/tools/test/xregs_sig/c2x2c_aarch64.S --- a/tools/test/xregs_sig/c2x2c_aarch64.S +++ b/tools/test/xregs_sig/c2x2c_aarch64.S @@ -5,9 +5,11 @@ * $FreeBSD$ */ - .global cpu_to_xregs - .type cpu_to_xregs, @function -cpu_to_xregs: + .text + + .globl cpu_to_vfp + .type cpu_to_vfp, @function +cpu_to_vfp: stp q0, q1, [x0, #( 0 * 16)] stp q2, q3, [x0, #( 2 * 16)] stp q4, q5, [x0, #( 4 * 16)] @@ -26,12 +28,12 @@ stp q30, q31, [x0, #(30 * 16)] ret - .size cpu_to_xregs, . - cpu_to_xregs + .size cpu_to_vfp, . - cpu_to_vfp - .global xregs_to_cpu - .type xregs_to_cpu, @function -xregs_to_cpu: + .globl vfp_to_cpu + .type vfp_to_cpu, @function +vfp_to_cpu: ldp q0, q1, [x0, #( 0 * 16)] ldp q2, q3, [x0, #( 2 * 16)] ldp q4, q5, [x0, #( 4 * 16)] @@ -50,6 +52,6 @@ ldp q30, q31, [x0, #(30 * 16)] ret - .size xregs_to_cpu, . - xregs_to_cpu + .size vfp_to_cpu, . - vfp_to_cpu .section .note.GNU-stack,"",@progbits diff --git a/tools/test/xregs_sig/c2x2c_amd64.S b/tools/test/xregs_sig/c2x2c_amd64.S --- a/tools/test/xregs_sig/c2x2c_amd64.S +++ b/tools/test/xregs_sig/c2x2c_amd64.S @@ -5,9 +5,11 @@ * $FreeBSD$ */ - .global cpu_to_xregs - .type cpu_to_xregs, @function -cpu_to_xregs: + .text + + .globl cpu_to_xmm + .type cpu_to_xmm, @function +cpu_to_xmm: movdqu %xmm0, (%rdi) movdqu %xmm1, 1 * 16(%rdi) movdqu %xmm2, 2 * 16(%rdi) @@ -26,12 +28,12 @@ movdqu %xmm15, 15 * 16(%rdi) retq - .size cpu_to_xregs, . - cpu_to_xregs + .size cpu_to_xmm, . - cpu_to_xmm - .global xregs_to_cpu - .type xregs_to_cpu, @function -xregs_to_cpu: + .globl xmm_to_cpu + .type xmm_to_cpu, @function +xmm_to_cpu: movdqu (%rdi), %xmm0 movdqu 1 * 16(%rdi), %xmm1 movdqu 2 * 16(%rdi), %xmm2 @@ -50,6 +52,6 @@ movdqu 15 * 16(%rdi), %xmm15 retq - .size xregs_to_cpu, . - xregs_to_cpu + .size xmm_to_cpu, . - xmm_to_cpu .section .note.GNU-stack,"",@progbits diff --git a/tools/test/xregs_sig/xregs_sig.c b/tools/test/xregs_sig/xregs_sig.c --- a/tools/test/xregs_sig/xregs_sig.c +++ b/tools/test/xregs_sig/xregs_sig.c @@ -44,30 +44,49 @@ #ifndef __unused #define __unused __attribute__((__unused__)) #endif -#ifndef nitems -#define nitems(x) (sizeof((x)) / sizeof((x)[0])) -#endif -#if defined(__amd64__) -#define XREGSRNAM "xmm" -#define NREGS 16 -#elif defined(__aarch64__) -#define XREGSRNAM "q" -#define NREGS 32 -#endif - -struct xregsreg { - uint8_t xregs_bytes[16]; +struct xregs_bank { + const char *b_name; + const char *r_name; + uint32_t regs; + uint32_t bytes; + void (*x2c)(uint8_t *); + void (*c2x)(uint8_t *); }; -struct xregs { - struct xregsreg xregsreg[NREGS]; +#if defined(__amd64__) +void cpu_to_xmm(uint8_t *); +void xmm_to_cpu(uint8_t *); + +static const struct xregs_bank xregs_banks[] = { + { + .b_name = "SSE", + .r_name = "xmm", + .regs = 16, + .bytes = 16, + .x2c = xmm_to_cpu, + .c2x = cpu_to_xmm, + }, }; - -void cpu_to_xregs(struct xregs *xregs); -void xregs_to_cpu(struct xregs *xregs); +#elif defined(__aarch64__) +void cpu_to_vfp(uint8_t *); +void vfp_to_cpu(uint8_t *); + +static const struct xregs_bank xregs_banks[] = { + { + .b_name = "VFP", + .r_name = "q", + .regs = 32, + .bytes = 16, + .x2c = vfp_to_cpu, + .c2x = cpu_to_vfp, + }, +}; +#endif static atomic_uint sigs; +static int max_bank_idx; + static void sigusr1_handler(int sig __unused, siginfo_t *si __unused, void *m __unused) @@ -87,23 +106,22 @@ alarm(TIMO); } -static struct xregs zero_xregs = {}; static void -fill_xregs(struct xregs *xregs) +fill_xregs(uint8_t *xregs, int bank) { - arc4random_buf(xregs, sizeof(*xregs)); + arc4random_buf(xregs, xregs_banks[bank].regs * xregs_banks[bank].bytes); } static void -dump_xregs(const struct xregsreg *r) +dump_xregs(const uint8_t *r, int bank) { unsigned k; - for (k = 0; k < nitems(r->xregs_bytes); k++) { + for (k = 0; k < xregs_banks[bank].bytes; k++) { if (k != 0) printf(" "); - printf("%02x", r->xregs_bytes[k]); + printf("%02x", r[k]); } printf("\n"); } @@ -111,9 +129,9 @@ static pthread_mutex_t show_lock; static void -show_diff(const struct xregs *xregs1, const struct xregs *xregs2) +show_diff(const uint8_t *xregs1, const uint8_t *xregs2, int bank) { - const struct xregsreg *r1, *r2; + const uint8_t *r1, *r2; unsigned i, j; #if defined(__FreeBSD__) @@ -121,14 +139,14 @@ #elif defined(__linux__) printf("thr %ld\n", syscall(SYS_gettid)); #endif - for (i = 0; i < nitems(xregs1->xregsreg); i++) { - r1 = &xregs1->xregsreg[i]; - r2 = &xregs2->xregsreg[i]; - for (j = 0; j < nitems(r1->xregs_bytes); j++) { - if (r1->xregs_bytes[j] != r2->xregs_bytes[j]) { - printf("%%%s%u\n", XREGSRNAM, i); - dump_xregs(r1); - dump_xregs(r2); + for (i = 0; i < xregs_banks[bank].regs; i++) { + r1 = xregs1 + i * xregs_banks[bank].bytes; + r2 = xregs2 + i * xregs_banks[bank].bytes; + for (j = 0; j < xregs_banks[bank].bytes; j++) { + if (r1[j] != r2[j]) { + printf("%%%s%u\n", xregs_banks[bank].r_name, i); + dump_xregs(r1, bank); + dump_xregs(r2, bank); break; } } @@ -142,28 +160,32 @@ } static void * -worker_thread(void *arg __unused) +worker_thread(void *arg) { - struct xregs xregs, xregs_cpu; + int bank = (uintptr_t)arg; + int sz = xregs_banks[bank].regs * xregs_banks[bank].bytes; + uint8_t xregs[sz], xregs_cpu[sz], zero_xregs[sz]; + + memset(zero_xregs, 0, sz); - fill_xregs(&xregs); + fill_xregs(xregs, bank); for (;;) { - xregs_to_cpu(&xregs); + xregs_banks[bank].x2c(xregs); my_pause(); - cpu_to_xregs(&xregs_cpu); - if (memcmp(&xregs, &xregs_cpu, sizeof(struct xregs)) != 0) { + xregs_banks[bank].c2x(xregs_cpu); + if (memcmp(xregs, xregs_cpu, sz) != 0) { pthread_mutex_lock(&show_lock); - show_diff(&xregs, &xregs_cpu); + show_diff(xregs, xregs_cpu, bank); abort(); pthread_mutex_unlock(&show_lock); } - xregs_to_cpu(&zero_xregs); + xregs_banks[bank].x2c(zero_xregs); my_pause(); - cpu_to_xregs(&xregs_cpu); - if (memcmp(&zero_xregs, &xregs_cpu, sizeof(struct xregs)) != 0) { + xregs_banks[bank].c2x(xregs_cpu); + if (memcmp(zero_xregs, xregs_cpu, sz) != 0) { pthread_mutex_lock(&show_lock); - show_diff(&zero_xregs, &xregs_cpu); + show_diff(zero_xregs, xregs_cpu, bank); abort(); pthread_mutex_unlock(&show_lock); } @@ -175,7 +197,9 @@ main(void) { struct sigaction sa; - int error, i, ncpu; + int error, i, ncpu, bank; + + max_bank_idx = 0; bzero(&sa, sizeof(sa)); sa.sa_handler = sigalrm_handler; @@ -199,14 +223,22 @@ } ncpu = sysconf(_SC_NPROCESSORS_ONLN); - ncpu *= 2; + if (max_bank_idx == 0) + ncpu *= 2; + bank = 0; pthread_t wt[ncpu]; +nextbank: + printf("Starting %d threads for registers bank %s sized [%d][%d]\n", ncpu, + xregs_banks[bank].b_name, xregs_banks[bank].regs, xregs_banks[bank].bytes); for (i = 0; i < ncpu; i++) { - error = pthread_create(&wt[i], NULL, worker_thread, NULL); + error = pthread_create(&wt[i], NULL, worker_thread, + (void *)(uintptr_t)bank); if (error != 0) { fprintf(stderr, "pthread_create %s\n", strerror(error)); } } + if (++bank <= max_bank_idx) + goto nextbank; alarm(TIMO); for (;;) {