Page MenuHomeFreeBSD
Paste P282

Non-inline versions (no explicit SIMD flags)
ActivePublic

Authored by cem on Aug 1 2019, 2:51 PM.
Tags
None
Referenced Files
F4950992: raw.txt
Aug 1 2019, 2:56 PM
F4950834: raw.txt
Aug 1 2019, 2:51 PM
Subscribers
None
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
bool
is_zero_or(const uint8_t * addr)
{
return ((addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]) == 0);
}
bool
is_bcast_and(const uint8_t * addr)
{
return ((addr[0] & addr[1] & addr[2] & addr[3] & addr[4] & addr[5]) == 0xff);
}
bool
is_zero_add(const uint8_t * addr)
{
return ((addr[0] + addr[1] + addr[2] + addr[3] + addr[4] + addr[5]) == 0);
}
bool
is_bcast_add(const uint8_t * addr)
{
return ((addr[0] + addr[1] + addr[2] + addr[3] + addr[4] + addr[5]) == (6*0xff));
}
========================================================================================
Clang 8.0.0, -O3:
// Add variants are much the same as the inline versions, but OR/AND get SIMD'd (obviously unhelpful for kernel)
0000000000000000 <is_zero_or>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: 66 0f 6e 07 movd (%rdi),%xmm0
8: 66 0f 60 c0 punpcklbw %xmm0,%xmm0
c: 66 0f 61 c0 punpcklwd %xmm0,%xmm0
10: 66 0f 70 c8 4e pshufd $0x4e,%xmm0,%xmm1
15: 66 0f eb c8 por %xmm0,%xmm1
19: 66 0f 70 c1 e5 pshufd $0xe5,%xmm1,%xmm0
1e: 66 0f eb c1 por %xmm1,%xmm0
22: 66 0f 7e c0 movd %xmm0,%eax
26: 0a 47 04 or 0x4(%rdi),%al
29: 0a 47 05 or 0x5(%rdi),%al
2c: 0f 94 c0 sete %al
2f: 5d pop %rbp
30: c3 retq
0000000000000040 <is_bcast_and>:
40: 55 push %rbp
41: 48 89 e5 mov %rsp,%rbp
44: 66 0f 6e 07 movd (%rdi),%xmm0
48: 66 0f 60 c0 punpcklbw %xmm0,%xmm0
4c: 66 0f 61 c0 punpcklwd %xmm0,%xmm0
50: 66 0f 70 c8 4e pshufd $0x4e,%xmm0,%xmm1
55: 66 0f db c8 pand %xmm0,%xmm1
59: 66 0f 70 c1 e5 pshufd $0xe5,%xmm1,%xmm0
5e: 66 0f db c1 pand %xmm1,%xmm0
62: 66 0f 7e c0 movd %xmm0,%eax
66: 22 47 04 and 0x4(%rdi),%al
69: 22 47 05 and 0x5(%rdi),%al
6c: 3c ff cmp $0xff,%al
6e: 0f 94 c0 sete %al
71: 5d pop %rbp
72: c3 retq
0000000000000080 <is_zero_add>:
80: 55 push %rbp
81: 48 89 e5 mov %rsp,%rbp
84: 0f b6 07 movzbl (%rdi),%eax
87: 0f b6 4f 01 movzbl 0x1(%rdi),%ecx
8b: 01 c1 add %eax,%ecx
8d: 0f b6 47 02 movzbl 0x2(%rdi),%eax
91: 01 c8 add %ecx,%eax
93: 0f b6 4f 03 movzbl 0x3(%rdi),%ecx
97: 01 c1 add %eax,%ecx
99: 0f b6 47 04 movzbl 0x4(%rdi),%eax
9d: 01 c8 add %ecx,%eax
9f: 0f b6 4f 05 movzbl 0x5(%rdi),%ecx
a3: 01 c1 add %eax,%ecx
a5: 0f 94 c0 sete %al
a8: 5d pop %rbp
a9: c3 retq
00000000000000b0 <is_bcast_add>:
b0: 55 push %rbp
b1: 48 89 e5 mov %rsp,%rbp
b4: 0f b6 07 movzbl (%rdi),%eax
b7: 0f b6 4f 01 movzbl 0x1(%rdi),%ecx
bb: 01 c1 add %eax,%ecx
bd: 0f b6 47 02 movzbl 0x2(%rdi),%eax
c1: 01 c8 add %ecx,%eax
c3: 0f b6 4f 03 movzbl 0x3(%rdi),%ecx
c7: 01 c1 add %eax,%ecx
c9: 0f b6 47 04 movzbl 0x4(%rdi),%eax
cd: 01 c8 add %ecx,%eax
cf: 0f b6 4f 05 movzbl 0x5(%rdi),%ecx
d3: 01 c1 add %eax,%ecx
d5: 81 f9 fa 05 00 00 cmp $0x5fa,%ecx
db: 0f 94 c0 sete %al
de: 5d pop %rbp
df: c3 retq
========================================================================================
Clang 8.0.0, -O3 -mno-sse:
// Generated code is now a bit smaller than add variants due to fewer/smaller instructions
0000000000000000 <is_zero_or>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: 8a 47 01 mov 0x1(%rdi),%al
7: 0a 07 or (%rdi),%al
9: 0a 47 02 or 0x2(%rdi),%al
c: 0a 47 03 or 0x3(%rdi),%al
f: 0a 47 04 or 0x4(%rdi),%al
12: 0a 47 05 or 0x5(%rdi),%al
15: 0f 94 c0 sete %al
18: 5d pop %rbp
19: c3 retq
0000000000000020 <is_bcast_and>:
20: 55 push %rbp
21: 48 89 e5 mov %rsp,%rbp
24: 8a 47 01 mov 0x1(%rdi),%al
27: 22 07 and (%rdi),%al
29: 22 47 02 and 0x2(%rdi),%al
2c: 22 47 03 and 0x3(%rdi),%al
2f: 22 47 04 and 0x4(%rdi),%al
32: 22 47 05 and 0x5(%rdi),%al
35: 3c ff cmp $0xff,%al
37: 0f 94 c0 sete %al
3a: 5d pop %rbp
3b: c3 retq
========================================================================================
GCC 8.3.0, -O3:
// Basically the same as Clang -O3 -mno-sse, but elides function prologues/epilogues for slightly shorter code.
0000000000000000 <is_zero_or>:
0: 0f b6 07 movzbl (%rdi),%eax
3: 0a 47 01 or 0x1(%rdi),%al
6: 0a 47 02 or 0x2(%rdi),%al
9: 0a 47 03 or 0x3(%rdi),%al
c: 0a 47 04 or 0x4(%rdi),%al
f: 0a 47 05 or 0x5(%rdi),%al
12: 0f 94 c0 sete %al
15: c3 retq
0000000000000020 <is_bcast_and>:
20: 0f b6 07 movzbl (%rdi),%eax
23: 22 47 01 and 0x1(%rdi),%al
26: 22 47 02 and 0x2(%rdi),%al
29: 22 47 03 and 0x3(%rdi),%al
2c: 22 47 04 and 0x4(%rdi),%al
2f: 22 47 05 and 0x5(%rdi),%al
32: 3c ff cmp $0xff,%al
34: 0f 94 c0 sete %al
37: c3 retq