Changeset View
Changeset View
Standalone View
Standalone View
sys/crypto/aesni/aesni_ghash.c
Show First 20 Lines • Show All 498 Lines • ▼ Show 20 Lines | if (nbytes%16) { | ||||
tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); | tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); | ||||
tmp1 = _mm_xor_si128(tmp1, KEY[0]); | tmp1 = _mm_xor_si128(tmp1, KEY[0]); | ||||
for (j=1; j<nr-1; j+=2) { | for (j=1; j<nr-1; j+=2) { | ||||
tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); | tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); | ||||
tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]); | tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]); | ||||
} | } | ||||
tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]); | tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]); | ||||
tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); | tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); | ||||
tmp1 = _mm_xor_si128(tmp1, | last_block = _mm_setzero_si128(); | ||||
_mm_loadu_si128(&((const __m128i *)in)[k])); | memcpy(&last_block, &((const __m128i *)in)[k], | ||||
last_block = tmp1; | nbytes % 16); | ||||
last_block ^= tmp1; | |||||
cem: Can we not use `_mm_xor_si128(tmp1, last_block)`? Or we assume `^` generates the same… | |||||
markjAuthorUnsubmitted Done Inline ActionsThe aesni code does both but we should stay consistent within this function, I'll switch to the intrinsic. markj: The aesni code does both but we should stay consistent within this function, I'll switch to the… | |||||
for (j=0; j<nbytes%16; j++) | for (j=0; j<nbytes%16; j++) | ||||
out[k*16+j] = ((unsigned char*)&last_block)[j]; | out[k*16+j] = ((unsigned char*)&last_block)[j]; | ||||
for ((void)j; j<16; j++) | for ((void)j; j<16; j++) | ||||
((unsigned char*)&last_block)[j] = 0; | ((unsigned char*)&last_block)[j] = 0; | ||||
tmp1 = last_block; | tmp1 = last_block; | ||||
tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); | tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); | ||||
X = _mm_xor_si128(X, tmp1); | X = _mm_xor_si128(X, tmp1); | ||||
gfmul(X, H, &X); | gfmul(X, H, &X); | ||||
▲ Show 20 Lines • Show All 291 Lines • Show Last 20 Lines |
Can we not use _mm_xor_si128(tmp1, last_block)? Or we assume ^ generates the same instruction?