diff --git a/tools/test/xregs_via_sig/c2x2c_amd64.S b/tools/test/xregs_via_sig/c2x2c_amd64.S --- a/tools/test/xregs_via_sig/c2x2c_amd64.S +++ b/tools/test/xregs_via_sig/c2x2c_amd64.S @@ -5,9 +5,37 @@ * $FreeBSD$ */ +#include + + + .global xregs_bank + .type xregs_bank, @function +xregs_bank: + movl $1, %eax + cpuid + andl $(CPUID2_AVX|CPUID2_OSXSAVE), %ecx + cmpl $(CPUID2_AVX|CPUID2_OSXSAVE), %ecx + jne sse + xorl %ecx, %ecx + xgetbv + andl $XFEATURE_AVX, %eax + cmpl $XFEATURE_AVX, %eax + jne sse + movq $1, %rax + retq +sse: + xor %rax, %rax + retq + + .size xregs_bank, . - xregs_bank + + .global cpu_to_xregs .type cpu_to_xregs, @function cpu_to_xregs: + call xregs_bank + cmpl $1, %eax + je cpu_to_avx movdqu %xmm0, (%rdi) movdqu %xmm1, 1 * 16(%rdi) movdqu %xmm2, 2 * 16(%rdi) @@ -26,12 +54,34 @@ movdqu %xmm15, 15 * 16(%rdi) retq +cpu_to_avx: + vmovdqu %ymm0, (%rdi) + vmovdqu %ymm1, 1 * 32(%rdi) + vmovdqu %ymm2, 2 * 32(%rdi) + vmovdqu %ymm3, 3 * 32(%rdi) + vmovdqu %ymm4, 4 * 32(%rdi) + vmovdqu %ymm5, 5 * 32(%rdi) + vmovdqu %ymm6, 6 * 32(%rdi) + vmovdqu %ymm7, 7 * 32(%rdi) + vmovdqu %ymm8, 8 * 32(%rdi) + vmovdqu %ymm9, 9 * 32(%rdi) + vmovdqu %ymm10, 10 * 32(%rdi) + vmovdqu %ymm11, 11 * 32(%rdi) + vmovdqu %ymm12, 12 * 32(%rdi) + vmovdqu %ymm13, 13 * 32(%rdi) + vmovdqu %ymm14, 14 * 32(%rdi) + vmovdqu %ymm15, 15 * 32(%rdi) + retq + .size cpu_to_xregs, . - cpu_to_xregs .global xregs_to_cpu .type xregs_to_cpu, @function xregs_to_cpu: + call xregs_bank + cmpl $1, %eax + je avx_to_cpu movdqu (%rdi), %xmm0 movdqu 1 * 16(%rdi), %xmm1 movdqu 2 * 16(%rdi), %xmm2 @@ -50,6 +100,25 @@ movdqu 15 * 16(%rdi), %xmm15 retq +avx_to_cpu: + vmovdqu (%rdi), %ymm0 + vmovdqu 1 * 32(%rdi), %ymm1 + vmovdqu 2 * 32(%rdi), %ymm2 + vmovdqu 3 * 32(%rdi), %ymm3 + vmovdqu 4 * 32(%rdi), %ymm4 + vmovdqu 5 * 32(%rdi), %ymm5 + vmovdqu 6 * 32(%rdi), %ymm6 + vmovdqu 7 * 32(%rdi), %ymm7 + vmovdqu 8 * 32(%rdi), %ymm8 + vmovdqu 9 * 32(%rdi), %ymm9 + vmovdqu 10 * 32(%rdi), %ymm10 + vmovdqu 11 * 32(%rdi), %ymm11 + vmovdqu 12 * 32(%rdi), %ymm12 + vmovdqu 13 * 32(%rdi), %ymm13 + vmovdqu 14 * 32(%rdi), %ymm14 + vmovdqu 15 * 32(%rdi), %ymm15 + retq + .size xregs_to_cpu, . - xregs_to_cpu .section .note.GNU-stack,"",@progbits diff --git a/tools/test/xregs_via_sig/xregs_via_sig.c b/tools/test/xregs_via_sig/xregs_via_sig.c --- a/tools/test/xregs_via_sig/xregs_via_sig.c +++ b/tools/test/xregs_via_sig/xregs_via_sig.c @@ -49,15 +49,25 @@ #endif #if defined(__amd64__) -#define XREGSRNAM "xmm" #define NREGS 16 +#define NBYTES 32 + +static const char *xregs_names[] = { + "xmm", + "ymm", +}; + #elif defined(__aarch64__) #define XREGSRNAM "q" #define NREGS 32 +#define NBYTES 16 +#endif +#if !defined(XREGSRNAM) +int xregs_bank(void); #endif struct xregsreg { - uint8_t xregs_bytes[16]; + uint8_t xregs_bytes[NBYTES]; }; struct xregs { @@ -69,6 +79,17 @@ static atomic_uint sigs; +static const char * +xregs_name(void) +{ +#if !defined(XREGSRNAM) + return (xregs_names[xregs_bank()]); +#else + return (XREGSRNAM); +#endif +} + + static void sigusr1_handler(int sig __unused, siginfo_t *si __unused, void *m __unused) { @@ -126,7 +147,7 @@ r2 = &xregs2->xregsreg[i]; for (j = 0; j < nitems(r1->xregs_bytes); j++) { if (r1->xregs_bytes[j] != r2->xregs_bytes[j]) { - printf("%%%s%u\n", XREGSRNAM, i); + printf("%%%s%u\n", xregs_name(), i); dump_xregs(r1); dump_xregs(r2); break;