Differential D34797 Diff 104915 sys/i386/i386/in_cksum_machdep.c

Changeset View

Standalone View

sys/i386/i386/in_cksum_machdep.c

Show First 20 Lines • Show All 50 Lines • ▼ Show 20 Lines
* This routine is very heavily used in the network		* This routine is very heavily used in the network
* code and should be modified for each CPU to be as fast as possible.		* code and should be modified for each CPU to be as fast as possible.
*		*
* This implementation is 386 version.		* This implementation is 386 version.
*/		*/

#undef ADDCARRY		#undef ADDCARRY
#define ADDCARRY(x) if ((x) > 0xffff) (x) -= 0xffff		#define ADDCARRY(x) if ((x) > 0xffff) (x) -= 0xffff
/*
* icc needs to be special cased here, as the asm code below results
* in broken code if compiled with icc.
*/
#if !defined(__GNUCLIKE_ASM)
/* non gcc parts stolen from sys/alpha/alpha/in_cksum.c */
#define REDUCE32 \
{ \
q_util.q = sum; \
sum = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
}
#define REDUCE16 \
{ \
q_util.q = sum; \
l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
sum = l_util.s[0] + l_util.s[1]; \
ADDCARRY(sum); \
}
#endif
#define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);}		#define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);}

#if !defined(__GNUCLIKE_ASM)
static const u_int32_t in_masks[] = {
/0 bytes/ /1 byte/ /2 bytes/ /3 bytes/
0x00000000, 0x000000FF, 0x0000FFFF, 0x00FFFFFF, /* offset 0 */
0x00000000, 0x0000FF00, 0x00FFFF00, 0xFFFFFF00, /* offset 1 */
0x00000000, 0x00FF0000, 0xFFFF0000, 0xFFFF0000, /* offset 2 */
0x00000000, 0xFF000000, 0xFF000000, 0xFF000000, /* offset 3 */
};

union l_util {
u_int16_t s[2];
u_int32_t l;
};
union q_util {
u_int16_t s[4];
u_int32_t l[2];
u_int64_t q;
};

static u_int64_t
in_cksumdata(const u_int32_t *lw, int len)
{
u_int64_t sum = 0;
u_int64_t prefilled;
int offset;
union q_util q_util;

if ((3 & (long) lw) == 0 && len == 20) {
sum = (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3] + lw[4];
REDUCE32;
return sum;
}

if ((offset = 3 & (long) lw) != 0) {
const u_int32_t *masks = in_masks + (offset << 2);
lw = (u_int32_t *) (((long) lw) - offset);
sum = *lw++ & masks[len >= 3 ? 3 : len];
len -= 4 - offset;
if (len <= 0) {
REDUCE32;
return sum;
}
}
#if 0
/*		/*
* Force to cache line boundary.
*/
offset = 32 - (0x1f & (long) lw);
if (offset < 32 && len > offset) {
len -= offset;
if (4 & offset) {
sum += (u_int64_t) lw[0];
lw += 1;
}
if (8 & offset) {
sum += (u_int64_t) lw[0] + lw[1];
lw += 2;
}
if (16 & offset) {
sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3];
lw += 4;
}
}
#endif
/*
* access prefilling to start load of next cache line.
* then add current cache line
* save result of prefilling for loop iteration.
*/
prefilled = lw[0];
while ((len -= 32) >= 4) {
u_int64_t prefilling = lw[8];
sum += prefilled + lw[1] + lw[2] + lw[3]
+ lw[4] + lw[5] + lw[6] + lw[7];
lw += 8;
prefilled = prefilling;
}
if (len >= 0) {
sum += prefilled + lw[1] + lw[2] + lw[3]
+ lw[4] + lw[5] + lw[6] + lw[7];
lw += 8;
} else {
len += 32;
}
while ((len -= 16) >= 0) {
sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3];
lw += 4;
}
len += 16;
while ((len -= 4) >= 0) {
sum += (u_int64_t) *lw++;
}
len += 4;
if (len > 0)
sum += (u_int64_t) (in_masks[len] & *lw);
REDUCE32;
return sum;
}

u_short
in_addword(u_short a, u_short b)
{
u_int64_t sum = a + b;

ADDCARRY(sum);
return (sum);
}

u_short
in_pseudo(u_int32_t a, u_int32_t b, u_int32_t c)
{
u_int64_t sum;
union q_util q_util;
union l_util l_util;

sum = (u_int64_t) a + b + c;
REDUCE16;
return (sum);
}

u_short
in_cksum_skip(struct mbuf *m, int len, int skip)
{
u_int64_t sum = 0;
int mlen = 0;
int clen = 0;
caddr_t addr;
union q_util q_util;
union l_util l_util;

len -= skip;
for (; skip && m; m = m->m_next) {
if (m->m_len > skip) {
mlen = m->m_len - skip;
addr = mtod(m, caddr_t) + skip;
goto skip_start;
} else {
skip -= m->m_len;
}
}

for (; m && len; m = m->m_next) {
if (m->m_len == 0)
continue;
mlen = m->m_len;
addr = mtod(m, caddr_t);
skip_start:
if (len < mlen)
mlen = len;
if ((clen ^ (long) addr) & 1)
sum += in_cksumdata((const u_int32_t *)addr, mlen) << 8;
else
sum += in_cksumdata((const u_int32_t *)addr, mlen);

clen += mlen;
len -= mlen;
}
REDUCE16;
return (~sum & 0xffff);
}

u_int in_cksum_hdr(const struct ip *ip)
{
u_int64_t sum = in_cksumdata((const u_int32_t *)ip, sizeof(struct ip));
union q_util q_util;
union l_util l_util;

REDUCE16;
return (~sum & 0xffff);
}
#else

/*
* These asm statements require __volatile because they pass information		* These asm statements require __volatile because they pass information
* via the condition codes. GCC does not currently provide a way to specify		* via the condition codes. GCC does not currently provide a way to specify
* the condition codes as an input or output operand.		* the condition codes as an input or output operand.
*		*
* The LOAD macro below is effectively a prefetch into cache. GCC will		* The LOAD macro below is effectively a prefetch into cache. GCC will
* load the value into a register but will not use it. Since modern CPUs		* load the value into a register but will not use it. Since modern CPUs
* reorder operations, this will generally take place in parallel with		* reorder operations, this will generally take place in parallel with
* other calculations.		* other calculations.
▲ Show 20 Lines • Show All 224 Lines • ▼ Show 20 Lines	if (mlen == -1) {
/* The last mbuf has odd # of bytes. Follow the		/* The last mbuf has odd # of bytes. Follow the
standard (the odd byte is shifted left by 8 bits) */		standard (the odd byte is shifted left by 8 bits) */
su.c[1] = 0;		su.c[1] = 0;
sum += su.s;		sum += su.s;
}		}
REDUCE;		REDUCE;
return (~sum & 0xffff);		return (~sum & 0xffff);
}		}
#endif