Index: sys/net/pfvar.h =================================================================== --- sys/net/pfvar.h +++ sys/net/pfvar.h @@ -330,6 +330,8 @@ (neg) \ ) +#define PF_ALGNMNT(off) (((off) % 2) == 0) + struct pf_rule_uid { uid_t uid[2]; u_int8_t op; @@ -1735,6 +1737,10 @@ void pf_change_proto_a(struct mbuf *, void *, u_int16_t *, u_int32_t, u_int8_t); void pf_change_tcp_a(struct mbuf *, void *, u_int16_t *, u_int32_t); +void pf_patch_16_unaligned(struct mbuf *, u_int16_t *, void *, u_int16_t, + bool, u_int8_t); +void pf_patch_32_unaligned(struct mbuf *, u_int16_t *, void *, u_int32_t, + bool, u_int8_t); void pf_send_deferred_syn(struct pf_state *); int pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *, struct pf_addr *, sa_family_t); Index: sys/netpfil/pf/pf.c =================================================================== --- sys/netpfil/pf/pf.c +++ sys/netpfil/pf/pf.c @@ -295,6 +295,8 @@ struct pf_state_key *, struct pf_state_key *); static int pf_addr_wrap_neq(struct pf_addr_wrap *, struct pf_addr_wrap *); +static void pf_patch_8(struct mbuf *, u_int16_t *, u_int8_t *, u_int8_t, + bool, u_int8_t); static struct pf_state *pf_find_state(struct pfi_kif *, struct pf_state_key_cmp *, u_int); static int pf_src_connlimit(struct pf_state **); @@ -2091,16 +2093,60 @@ u_int16_t pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp) { - u_int32_t l; - - if (udp && !cksum) - return (0x0000); - l = cksum + old - new; - l = (l >> 16) + (l & 65535); - l = l & 65535; - if (udp && !l) - return (0xFFFF); - return (l); + u_int32_t x; + + x = cksum + old - new; + x = (x + (x >> 16)) & 0xffff; + + /* optimise: eliminate a branch when not udp */ + if (udp && cksum == 0x0000) + return cksum; + if (udp && x == 0x0000) + x = 0xffff; + + return (u_int16_t)(x); +} + +static void +pf_patch_8(struct mbuf *m, u_int16_t *cksum, u_int8_t *f, u_int8_t v, bool hi, + u_int8_t udp) +{ + u_int16_t old = htons(hi ? (*f << 8) : *f); + u_int16_t new = htons(hi ? ( v << 8) : v); + + if (*f == v) + return; + + *f = v; + + if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) + return; + + *cksum = pf_cksum_fixup(*cksum, old, new, udp); +} + +void +pf_patch_16_unaligned(struct mbuf *m, u_int16_t *cksum, void *f, u_int16_t v, + bool hi, u_int8_t udp) +{ + u_int8_t *fb = (u_int8_t *)f; + u_int8_t *vb = (u_int8_t *)&v; + + pf_patch_8(m, cksum, fb++, *vb++, hi, udp); + pf_patch_8(m, cksum, fb++, *vb++, !hi, udp); +} + +void +pf_patch_32_unaligned(struct mbuf *m, u_int16_t *cksum, void *f, u_int32_t v, + bool hi, u_int8_t udp) +{ + u_int8_t *fb = (u_int8_t *)f; + u_int8_t *vb = (u_int8_t *)&v; + + pf_patch_8(m, cksum, fb++, *vb++, hi, udp); + pf_patch_8(m, cksum, fb++, *vb++, !hi, udp); + pf_patch_8(m, cksum, fb++, *vb++, hi, udp); + pf_patch_8(m, cksum, fb++, *vb++, !hi, udp); } u_int16_t @@ -2326,6 +2372,7 @@ return 0; while (hlen >= TCPOLEN_SACKLEN) { + size_t startoff = opt - opts; olen = opt[1]; switch (*opt) { case TCPOPT_EOL: /* FALLTHROUGH */ @@ -2340,10 +2387,16 @@ for (i = 2; i + TCPOLEN_SACK <= olen; i += TCPOLEN_SACK) { memcpy(&sack, &opt[i], sizeof(sack)); - pf_change_proto_a(m, &sack.start, &th->th_sum, - htonl(ntohl(sack.start) - dst->seqdiff), 0); - pf_change_proto_a(m, &sack.end, &th->th_sum, - htonl(ntohl(sack.end) - dst->seqdiff), 0); + pf_patch_32_unaligned(m, + &th->th_sum, &sack.start, + htonl(ntohl(sack.start) - dst->seqdiff), + PF_ALGNMNT(startoff), + 0); + pf_patch_32_unaligned(m, &th->th_sum, + &sack.end, + htonl(ntohl(sack.end) - dst->seqdiff), + PF_ALGNMNT(startoff), + 0); memcpy(&opt[i], &sack, sizeof(sack)); } copyback = 1; Index: sys/netpfil/pf/pf_norm.c =================================================================== --- sys/netpfil/pf/pf_norm.c +++ sys/netpfil/pf/pf_norm.c @@ -1517,6 +1517,7 @@ u_int8_t *opt; int copyback = 0; int got_ts = 0; + size_t startoff; KASSERT((src->scrub || dst->scrub), ("%s: src->scrub && dst->scrub!", __func__)); @@ -1560,6 +1561,7 @@ opt = hdr + sizeof(struct tcphdr); hlen = (th->th_off << 2) - sizeof(struct tcphdr); while (hlen >= TCPOLEN_TIMESTAMP) { + startoff = opt - (hdr + sizeof(struct tcphdr)); switch (*opt) { case TCPOPT_EOL: /* FALLTHROUGH */ case TCPOPT_NOP: @@ -1589,10 +1591,12 @@ (src->scrub->pfss_flags & PFSS_TIMESTAMP)) { tsval = ntohl(tsval); - pf_change_proto_a(m, &opt[2], + pf_patch_32_unaligned(m, &th->th_sum, + &opt[2], htonl(tsval + src->scrub->pfss_ts_mod), + PF_ALGNMNT(startoff), 0); copyback = 1; } @@ -1605,8 +1609,11 @@ PFSS_TIMESTAMP)) { tsecr = ntohl(tsecr) - dst->scrub->pfss_ts_mod; - pf_change_proto_a(m, &opt[6], - &th->th_sum, htonl(tsecr), + pf_patch_32_unaligned(m, + &th->th_sum, + &opt[6], + htonl(tsecr), + PF_ALGNMNT(startoff), 0); copyback = 1; } @@ -1903,6 +1910,7 @@ int rewrite = 0; u_char opts[TCP_MAXOLEN]; u_char *optp = opts; + size_t startoff; thoff = th->th_off << 2; cnt = thoff - sizeof(struct tcphdr); @@ -1912,6 +1920,7 @@ return (rewrite); for (; cnt > 0; cnt -= optlen, optp += optlen) { + startoff = optp - opts; opt = optp[0]; if (opt == TCPOPT_EOL) break; @@ -1928,9 +1937,11 @@ case TCPOPT_MAXSEG: mss = (u_int16_t *)(optp + 2); if ((ntohs(*mss)) > r->max_mss) { - th->th_sum = pf_proto_cksum_fixup(m, - th->th_sum, *mss, htons(r->max_mss), 0); - *mss = htons(r->max_mss); + pf_patch_16_unaligned(m, + &th->th_sum, + mss, htons(r->max_mss), + PF_ALGNMNT(startoff), + 0); rewrite = 1; } break;