diff --git a/tools/test/xregs_sig/c2x2c_aarch64.S b/tools/test/xregs_sig/c2x2c_aarch64.S --- a/tools/test/xregs_sig/c2x2c_aarch64.S +++ b/tools/test/xregs_sig/c2x2c_aarch64.S @@ -7,6 +7,15 @@ .text + .globl xregs_banks_max + .type xregs_banks_max, @function +xregs_banks_max: + mov x0, #0 + ret + + .size xregs_banks_max, . - xregs_banks_max + + .globl cpu_to_vfp .type cpu_to_vfp, @function cpu_to_vfp: diff --git a/tools/test/xregs_sig/c2x2c_amd64.S b/tools/test/xregs_sig/c2x2c_amd64.S --- a/tools/test/xregs_sig/c2x2c_amd64.S +++ b/tools/test/xregs_sig/c2x2c_amd64.S @@ -5,8 +5,45 @@ * $FreeBSD$ */ +#if defined(__FreeBSD__) +#include +#else +#define CPUID2_OSXSAVE 0x08000000 +#define CPUID2_AVX 0x10000000 +#define XFEATURE_ENABLED_X87 0x00000001 +#define XFEATURE_ENABLED_SSE 0x00000002 +#define XFEATURE_ENABLED_AVX 0x00000004 +#define XFEATURE_AVX \ + (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX) +#endif + .text + .globl xregs_banks_max + .type xregs_banks_max, @function +xregs_banks_max: + pushq %rbx + movl $1, %eax + cpuid + andl $(CPUID2_AVX|CPUID2_OSXSAVE), %ecx + cmpl $(CPUID2_AVX|CPUID2_OSXSAVE), %ecx + jne sse + xorl %ecx, %ecx + xgetbv + andl $XFEATURE_AVX, %eax + cmpl $XFEATURE_AVX, %eax + jne sse + movl $1, %eax + jmp out +sse: + xorl %eax, %eax +out: + popq %rbx + retq + + .size xregs_banks_max, . - xregs_banks_max + + .globl cpu_to_xmm .type cpu_to_xmm, @function cpu_to_xmm: @@ -54,4 +91,52 @@ .size xmm_to_cpu, . - xmm_to_cpu + + .globl cpu_to_avx + .type cpu_to_avx, @function +cpu_to_avx: + vmovdqu %ymm0, (%rdi) + vmovdqu %ymm1, 1 * 32(%rdi) + vmovdqu %ymm2, 2 * 32(%rdi) + vmovdqu %ymm3, 3 * 32(%rdi) + vmovdqu %ymm4, 4 * 32(%rdi) + vmovdqu %ymm5, 5 * 32(%rdi) + vmovdqu %ymm6, 6 * 32(%rdi) + vmovdqu %ymm7, 7 * 32(%rdi) + vmovdqu %ymm8, 8 * 32(%rdi) + vmovdqu %ymm9, 9 * 32(%rdi) + vmovdqu %ymm10, 10 * 32(%rdi) + vmovdqu %ymm11, 11 * 32(%rdi) + vmovdqu %ymm12, 12 * 32(%rdi) + vmovdqu %ymm13, 13 * 32(%rdi) + vmovdqu %ymm14, 14 * 32(%rdi) + vmovdqu %ymm15, 15 * 32(%rdi) + retq + + .size cpu_to_avx, . - cpu_to_avx + + + .globl avx_to_cpu + .type avx_to_cpu, @function +avx_to_cpu: + vmovdqu (%rdi), %ymm0 + vmovdqu 1 * 32(%rdi), %ymm1 + vmovdqu 2 * 32(%rdi), %ymm2 + vmovdqu 3 * 32(%rdi), %ymm3 + vmovdqu 4 * 32(%rdi), %ymm4 + vmovdqu 5 * 32(%rdi), %ymm5 + vmovdqu 6 * 32(%rdi), %ymm6 + vmovdqu 7 * 32(%rdi), %ymm7 + vmovdqu 8 * 32(%rdi), %ymm8 + vmovdqu 9 * 32(%rdi), %ymm9 + vmovdqu 10 * 32(%rdi), %ymm10 + vmovdqu 11 * 32(%rdi), %ymm11 + vmovdqu 12 * 32(%rdi), %ymm12 + vmovdqu 13 * 32(%rdi), %ymm13 + vmovdqu 14 * 32(%rdi), %ymm14 + vmovdqu 15 * 32(%rdi), %ymm15 + retq + + .size avx_to_cpu, . - avx_to_cpu + .section .note.GNU-stack,"",@progbits diff --git a/tools/test/xregs_sig/xregs_sig.c b/tools/test/xregs_sig/xregs_sig.c --- a/tools/test/xregs_sig/xregs_sig.c +++ b/tools/test/xregs_sig/xregs_sig.c @@ -54,9 +54,13 @@ void (*c2x)(uint8_t *); }; +int xregs_banks_max(void); + #if defined(__amd64__) void cpu_to_xmm(uint8_t *); void xmm_to_cpu(uint8_t *); +void cpu_to_avx(uint8_t *); +void avx_to_cpu(uint8_t *); static const struct xregs_bank xregs_banks[] = { { @@ -67,6 +71,14 @@ .x2c = xmm_to_cpu, .c2x = cpu_to_xmm, }, + { + .b_name = "AVX", + .r_name = "ymm", + .regs = 16, + .bytes = 32, + .x2c = avx_to_cpu, + .c2x = cpu_to_avx, + }, }; #elif defined(__aarch64__) void cpu_to_vfp(uint8_t *); @@ -199,7 +211,7 @@ struct sigaction sa; int error, i, ncpu, bank; - max_bank_idx = 0; + max_bank_idx = xregs_banks_max(); bzero(&sa, sizeof(sa)); sa.sa_handler = sigalrm_handler;