Index: sys/netinet6/frag6.c =================================================================== --- sys/netinet6/frag6.c +++ sys/netinet6/frag6.c @@ -146,13 +146,15 @@ * * There is no explicit reason given in the RFC. Historical reason maybe? */ -/* - * Fragment input - */ -int -frag6_input(struct mbuf **mp, int *offp, int proto) +#define SEND_ICMP6_PARAMPROB(m, off) \ + if (quiet == 0) { \ + icmp6_error((m), ICMP6_PARAM_PROB, \ + ICMP6_PARAMPROB_HEADER, (off)); \ + } +struct mbuf* +ip6_reass(struct mbuf *m, int *offp, int *proto, int quiet) { - struct mbuf *m = *mp, *t; + struct mbuf *t; struct ip6_hdr *ip6; struct ip6_frag *ip6f; struct ip6q *q6; @@ -162,24 +164,16 @@ int first_frag = 0; int fragoff, frgpartlen; /* must be larger than u_int16_t */ struct ifnet *dstifp; - u_int8_t ecn, ecn0; -#ifdef RSS - struct m_tag *mtag; - struct ip6_direct_ctx *ip6dc; -#endif + uint8_t ecn, ecn0; -#if 0 - char ip6buf[INET6_ADDRSTRLEN]; -#endif - ip6 = mtod(m, struct ip6_hdr *); #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), IPPROTO_DONE); + IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), NULL); ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset); #else IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f)); if (ip6f == NULL) - return (IPPROTO_DONE); + return (NULL); #endif dstifp = NULL; @@ -189,11 +183,12 @@ dstifp = ia->ia_ifp; ifa_free(&ia->ia_ifa); } + /* jumbo payload can't contain a fragment header */ if (ip6->ip6_plen == 0) { - icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset); + SEND_ICMP6_PARAMPROB(m, offset); in6_ifstat_inc(dstifp, ifs6_reass_fail); - return IPPROTO_DONE; + return (NULL); } /* @@ -204,10 +199,9 @@ */ if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) && (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) { - icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, - offsetof(struct ip6_hdr, ip6_plen)); + SEND_ICMP6_PARAMPROB(m, offsetof(struct ip6_hdr, ip6_plen)); in6_ifstat_inc(dstifp, ifs6_reass_fail); - return IPPROTO_DONE; + return (NULL); } IP6STAT_INC(ip6s_fragments); @@ -218,14 +212,17 @@ /* * RFC 6946: Handle "atomic" fragments (offset and m bit set to 0) - * upfront, unrelated to any reassembly. Just skip the fragment header. + * upfront, unrelated to any reassembly. Just skip the fragment + * header. */ if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) { /* XXX-BZ we want dedicated counters for this. */ IP6STAT_INC(ip6s_reassembled); in6_ifstat_inc(dstifp, ifs6_reass_ok); *offp = offset; - return (ip6f->ip6f_nxt); + *proto = ip6f->ip6f_nxt; + m->m_pkthdr.ip6frag_maxsize = 0; + return (m); } IP6Q_LOCK(); @@ -269,10 +266,9 @@ goto dropfrag; V_frag6_nfragpackets++; q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FTABLE, - M_NOWAIT); + M_NOWAIT | M_ZERO); if (q6 == NULL) goto dropfrag; - bzero(q6, sizeof(*q6)); #ifdef MAC if (mac_ip6q_init(q6, M_NOWAIT) != 0) { free(q6, M_FTABLE); @@ -317,19 +313,20 @@ frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset; if (q6->ip6q_unfrglen >= 0) { /* The 1st fragment has already arrived. */ - if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) { - icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, + if (q6->ip6q_unfrglen + fragoff + + frgpartlen > IPV6_MAXPACKET) { + SEND_ICMP6_PARAMPROB(m, offset - sizeof(struct ip6_frag) + offsetof(struct ip6_frag, ip6f_offlg)); IP6Q_UNLOCK(); - return (IPPROTO_DONE); + return (NULL); } } else if (fragoff + frgpartlen > IPV6_MAXPACKET) { - icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, + SEND_ICMP6_PARAMPROB(m, offset - sizeof(struct ip6_frag) + offsetof(struct ip6_frag, ip6f_offlg)); IP6Q_UNLOCK(); - return (IPPROTO_DONE); + return (NULL); } /* * If it's the first fragment, do the above check for each @@ -340,8 +337,8 @@ af6 = af6dwn) { af6dwn = af6->ip6af_down; - if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen > - IPV6_MAXPACKET) { + if (q6->ip6q_unfrglen + af6->ip6af_off + + af6->ip6af_frglen > IPV6_MAXPACKET) { struct mbuf *merr = IP6_REASS_MBUF(af6); struct ip6_hdr *ip6err; int erroff = af6->ip6af_offset; @@ -360,8 +357,7 @@ ip6err->ip6_src = q6->ip6q_src; ip6err->ip6_dst = q6->ip6q_dst; - icmp6_error(merr, ICMP6_PARAM_PROB, - ICMP6_PARAMPROB_HEADER, + SEND_ICMP6_PARAMPROB(merr, erroff - sizeof(struct ip6_frag) + offsetof(struct ip6_frag, ip6f_offlg)); } @@ -369,10 +365,9 @@ } ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FTABLE, - M_NOWAIT); + M_NOWAIT | M_ZERO); if (ip6af == NULL) goto dropfrag; - bzero(ip6af, sizeof(*ip6af)); ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG; ip6af->ip6af_off = fragoff; ip6af->ip6af_frglen = frgpartlen; @@ -506,20 +501,22 @@ frag6_insque(q6, &V_ip6q); } #endif - next = 0; + i = next = 0; for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; af6 = af6->ip6af_down) { if (af6->ip6af_off != next) { IP6Q_UNLOCK(); - return IPPROTO_DONE; + return (NULL); } + /* Remember the maximum fragment size */ + if (af6->ip6af_frglen > i) + i = af6->ip6af_frglen; next += af6->ip6af_frglen; } if (af6->ip6af_up->ip6af_mff) { IP6Q_UNLOCK(); - return IPPROTO_DONE; + return (NULL); } - /* * Reassembly is complete; concatenate fragments. */ @@ -551,7 +548,8 @@ offset = ip6af->ip6af_offset - sizeof(struct ip6_frag); free(ip6af, M_FTABLE); ip6 = mtod(m, struct ip6_hdr *); - ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr)); + ip6->ip6_plen = htons((u_short)next + offset - + sizeof(struct ip6_hdr)); if (q6->ip6q_ecn == IPTOS_ECN_CE) ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20); nxt = q6->ip6q_nxt; @@ -578,6 +576,13 @@ char *prvnxtp = ip6_get_prevhdr(m, offset); /* XXX */ *prvnxtp = nxt; } + /* + * Save reassembly info into the mbuf's local storage. + */ + m->m_pkthdr.ip6frag_maxsize = i; /* maximum fragment size */ + m->m_pkthdr.ip6frag_offset = q6->ip6q_unfrglen + + sizeof(struct ip6_hdr); + m->m_pkthdr.ip6frag_ident = q6->ip6q_ident; frag6_remque(q6); V_frag6_nfrags -= q6->ip6q_nfrag; @@ -595,46 +600,63 @@ m->m_pkthdr.len = plen; } -#ifdef RSS - mtag = m_tag_alloc(MTAG_ABI_IPV6, IPV6_TAG_DIRECT, sizeof(*ip6dc), - M_NOWAIT); - if (mtag == NULL) - goto dropfrag; - ip6dc = (struct ip6_direct_ctx *)(mtag + 1); - ip6dc->ip6dc_nxt = nxt; - ip6dc->ip6dc_off = offset; - - m_tag_prepend(m, mtag); -#endif - IP6Q_UNLOCK(); IP6STAT_INC(ip6s_reassembled); in6_ifstat_inc(dstifp, ifs6_reass_ok); - -#ifdef RSS - /* - * Queue/dispatch for reprocessing. - */ - netisr_dispatch(NETISR_IPV6_DIRECT, m); - return IPPROTO_DONE; -#endif - - /* - * Tell launch routine the next header - */ - - *mp = m; *offp = offset; + *proto = nxt; - return nxt; + return (m); dropfrag: IP6Q_UNLOCK(); in6_ifstat_inc(dstifp, ifs6_reass_fail); IP6STAT_INC(ip6s_fragdropped); m_freem(m); - return IPPROTO_DONE; + return (NULL); +} +#undef SEND_ICMP6_PARAMPROB +/* + * Fragment input + */ +int +frag6_input(struct mbuf **mp, int *offp, int proto) +{ +#ifdef RSS + struct m_tag *mtag; + struct ip6_direct_ctx *ip6dc; +#endif + + *mp = ip6_reass(*mp, offp, &proto, 0); + if (*mp == NULL) + return (IPPROTO_DONE); + +#ifdef RSS + mtag = m_tag_alloc(MTAG_ABI_IPV6, IPV6_TAG_DIRECT, sizeof(*ip6dc), + M_NOWAIT); + if (mtag == NULL) { + IP6STAT_INC(ip6s_fragdropped); + m_freem(*mp); + return (IPPROTO_DONE); + } + + ip6dc = (struct ip6_direct_ctx *)(mtag + 1); + ip6dc->ip6dc_nxt = proto; + ip6dc->ip6dc_off = *offp; + + m_tag_prepend(*mp, mtag); + /* + * Queue/dispatch for reprocessing. + */ + netisr_dispatch(NETISR_IPV6_DIRECT, *mp); + return (IPPROTO_DONE); +#else + /* + * Tell launch routine the next header + */ + return (proto); +#endif } /* Index: sys/netinet6/ip6_var.h =================================================================== --- sys/netinet6/ip6_var.h +++ sys/netinet6/ip6_var.h @@ -396,6 +396,7 @@ int ip6_deletefraghdr(struct mbuf *, int, int); int ip6_fragment(struct ifnet *, struct mbuf *, int, u_char, int, uint32_t); +struct mbuf* ip6_reass(struct mbuf *, int *, int *, int); int route6_input(struct mbuf **, int *, int); Index: sys/netpfil/ipfw/ip_fw2.c =================================================================== --- sys/netpfil/ipfw/ip_fw2.c +++ sys/netpfil/ipfw/ip_fw2.c @@ -234,6 +234,7 @@ #define UDP(p) ((struct udphdr *)(p)) #define ICMP(p) ((struct icmphdr *)(p)) #define ICMP6(p) ((struct icmp6_hdr *)(p)) +#define IP6F(p) ((struct ip6_frag *)(p)) static __inline int icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd) @@ -493,7 +494,7 @@ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }}}; -static int +int ipfw_localip6(struct in6_addr *in6) { struct rm_priotracker in6_ifa_tracker; @@ -600,6 +601,35 @@ args->m = NULL; } +/* + * Simplified version of ip6_lasthdr(). + * This function assumes that mbuf is contiguous enough to read the last + * header without need to make m_pullup(). Also, it assumes that ipfw_chk() + * already did this work, so it is known that we have valid packet and there + * is no need to do boundary checks again. + * + * Returns protocol number and optionally offset of the last header. + */ +int +ipfw_ip6lasthdr(struct mbuf *m, int *offset) +{ + struct ip6_hdr *ip6; + struct ip6_hbh *hbh; + int proto, hlen; + + ip6 = mtod(m, struct ip6_hdr *); + hlen = sizeof(*ip6); + proto = ip6->ip6_nxt; + while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING || + proto == IPPROTO_DSTOPTS) { + hbh = mtodo(m, hlen); + proto = hbh->ip6h_nxt; + hlen += hbh->ip6h_len << 3; + } + if (offset != NULL) + *offset = hlen; + return (proto); +} #endif /* INET6 */ @@ -831,6 +861,47 @@ } #endif +/* + * Do IP fragments reassembly. + * Return: + * 0 - reassembly is not needed. + * 1 - reassembly is completed + */ +int +ipfw_reass(struct mbuf **mp, int addr_type, int offset) +{ + struct ip *ip; + int hlen; + + switch (addr_type) { + case 4: + ip = mtod(*mp, struct ip *); + if ((ntohs(ip->ip_off) & (IP_MF | IP_OFFMASK)) == 0) + return (0); + *mp = ip_reass(*mp); + if (*mp != NULL) { + /* IP header checksum fixup after reassembly */ + ip = mtod(*mp, struct ip *); + hlen = ip->ip_hl << 2; + ip->ip_sum = 0; + ip->ip_sum = hlen > sizeof(*ip) ? in_cksum_hdr(ip): + in_cksum(*mp, hlen); + } + break; +#ifdef INET6 + case 6: + if (offset == 0 && + ipfw_ip6lasthdr(*mp, &offset) != IPPROTO_FRAGMENT) + return (0); + *mp = ip6_reass(*mp, &offset, &hlen /* unused */, 1); + break; +#endif /* INET6 */ + default: + return (0); + } + return (1); +} + #define TARG(k, f) IP_FW_ARG_TABLEARG(chain, k, f) /* * The main check routine for the firewall. @@ -940,9 +1011,12 @@ * without needed). We will treat a single packet fragment as if * there was no fragment header (or log/block depending on the * V_fw_permit_single_frag6 sysctl setting). + * + * ip6fh_off The offset of an ip6_frag header. */ u_short offset = 0; u_short ip6f_mf = 0; + u_short ip6fh_off = 0; /* * Local copies of addresses. They are only valid if we have @@ -1100,12 +1174,12 @@ case IPPROTO_FRAGMENT: /* RFC 2460 */ PULLUP_TO(hlen, ulp, struct ip6_frag); ext_hd |= EXT_FRAGMENT; + ip6fh_off = hlen; hlen += sizeof (struct ip6_frag); - proto = ((struct ip6_frag *)ulp)->ip6f_nxt; - offset = ((struct ip6_frag *)ulp)->ip6f_offlg & - IP6F_OFF_MASK; - ip6f_mf = ((struct ip6_frag *)ulp)->ip6f_offlg & - IP6F_MORE_FRAG; + proto = IP6F(ulp)->ip6f_nxt; + offset = IP6F(ulp)->ip6f_offlg & IP6F_OFF_MASK; + ip6f_mf = IP6F(ulp)->ip6f_offlg & + IP6F_MORE_FRAG; if (V_fw_permit_single_frag6 == 0 && offset == 0 && ip6f_mf == 0) { if (V_fw_verbose) @@ -1116,7 +1190,7 @@ break; } args->f_id.extra = - ntohl(((struct ip6_frag *)ulp)->ip6f_ident); + ntohl(IP6F(ulp)->ip6f_ident); ulp = NULL; break; @@ -1214,6 +1288,10 @@ iplen = ntohs(ip->ip_len); pktlen = iplen < pktlen ? iplen : pktlen; + /* + * XXX: what to do with first fragment, that has + * not enough length, but it is still valid? + */ if (offset == 0) { switch (proto) { case IPPROTO_TCP: @@ -2572,40 +2650,21 @@ retval = ipfw_nat_ptr(args, t, m); break; - case O_REASS: { - int ip_off; - + case O_REASS: IPFW_INC_RULE_COUNTER(f, pktlen); l = 0; /* in any case exit inner loop */ - ip_off = ntohs(ip->ip_off); - - /* if not fragmented, go to next rule */ - if ((ip_off & (IP_MF | IP_OFFMASK)) == 0) - break; - - args->m = m = ip_reass(m); - - /* - * do IP header checksum fixup. - */ - if (m == NULL) { /* fragment got swallowed */ - retval = IP_FW_DENY; - } else { /* good, packet complete */ - int hlen; - - ip = mtod(m, struct ip *); - hlen = ip->ip_hl << 2; - ip->ip_sum = 0; - if (hlen == sizeof(struct ip)) - ip->ip_sum = in_cksum_hdr(ip); - else - ip->ip_sum = in_cksum(m, hlen); - retval = IP_FW_REASS; - set_match(args, f_pos, chain); + if (ipfw_reass(&args->m, + args->f_id.addr_type, ip6fh_off) == 0) + break; + if (args->m == NULL) + retval = IP_FW_DENY; + else { + retval = IP_FW_REASS; + set_match(args, f_pos, chain); } done = 1; /* exit outer loop */ break; - } + case O_EXTERNAL_ACTION: l = 0; /* in any case exit inner loop */ retval = ipfw_run_eaction(chain, args, Index: sys/netpfil/ipfw/ip_fw_pfil.c =================================================================== --- sys/netpfil/ipfw/ip_fw_pfil.c +++ sys/netpfil/ipfw/ip_fw_pfil.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -82,7 +83,7 @@ int ipfw_chg_hook(SYSCTL_HANDLER_ARGS); /* Forward declarations. */ -static int ipfw_divert(struct mbuf **, int, struct ipfw_rule_ref *, int); +static int ipfw_divert(struct mbuf **, int, struct ip_fw_args *, int); int ipfw_check_packet(void *, struct mbuf **, struct ifnet *, int, struct inpcb *); int ipfw_check_frame(void *, struct mbuf **, struct ifnet *, int, @@ -113,7 +114,177 @@ #endif /* SYSCTL_NODE */ +static int +ipfw_setnexthop(struct ip_fw_args *args, int dir) +{ + struct m_tag *fwd_tag; + struct sockaddr *sa; + +#if !defined(INET6) && !defined(INET) + return (EACCES); +#else + KASSERT(args->next_hop == NULL || args->next_hop6 == NULL, + ("%s: both next_hop=%p and next_hop6=%p not NULL", __func__, + args->next_hop, args->next_hop6)); + + /* Reuse the tag if present. */ + if (args->m->m_flags & (M_IP_NEXTHOP | M_IP6_NEXTHOP)) + fwd_tag = m_tag_find(args->m, PACKET_TAG_IPFORWARD, NULL); + else + fwd_tag = NULL; +#ifdef INET6 + if (args->next_hop6 != NULL) { + /* + * If nh6 address is link-local we should convert it + * to kernel internal form before doing any comparisons. + */ + if (sa6_embedscope(args->next_hop6, V_ip6_use_defzone) != 0) + return (IP_FW_DENY); + if (in6_localip(&args->next_hop6->sin6_addr) != 0) + args->m->m_flags |= M_FASTFWD_OURS; + else + args->m->m_flags &= ~M_FASTFWD_OURS; + /* + * If fwd_tag is already exist, check that it has enough + * room to keep IPv6 address. + */ + sa = (struct sockaddr *)args->next_hop6; + if (fwd_tag != NULL && + sa->sa_len != ((struct sockaddr *)(fwd_tag + 1))->sa_len) { + m_tag_delete(args->m, fwd_tag); + fwd_tag = NULL; + } + args->m->m_flags |= M_IP6_NEXTHOP; + } +#endif +#ifdef INET + if (args->next_hop != NULL) { + if (in_localip(args->next_hop->sin_addr)) + args->m->m_flags |= M_FASTFWD_OURS; + else + args->m->m_flags &= ~M_FASTFWD_OURS; + sa = (struct sockaddr *)args->next_hop; + args->m->m_flags |= M_IP_NEXTHOP; + } +#endif + if (fwd_tag == NULL) { + fwd_tag = m_tag_get(PACKET_TAG_IPFORWARD, sa->sa_len, + M_NOWAIT); + if (fwd_tag == NULL) + return (EACCES); + m_tag_prepend(args->m, fwd_tag); + } + /* Copy address into the tag. */ + bcopy(sa, fwd_tag + 1, sa->sa_len); + return (0); +#endif /* INET || INET6 */ +} +#ifdef INET6 /* + * Make IPv6 fragmentation for reassembled packet. + */ +static int +ipfw_ip6_fragment(struct ip_fw_args *args, int dir, int action) +{ + struct ifnet *rcvif; + struct mbuf *m, *m0; + int error; + + /* + * Refragmentation for "atomic" fragments is not needed, + * keep fragment as is. + */ + if (args->m->m_pkthdr.ip6frag_maxsize == 0) + return (action); + if (args->m->m_pkthdr.rcvif == NULL) + return (IP_FW_DENY); + + switch (action) { + case IP_FW_DENY: + case IP_FW_DIVERT: + /* No need to make fragmentation. */ + return (action); + case IP_FW_PASS: + /* + * Check next_hop6. If we should forward packet to our + * local address, no need to make fragmentation. + */ + if (args->next_hop6 != NULL) { + if (ipfw_setnexthop(args, dir) != 0) + return (IP_FW_DENY); + if (args->m->m_flags & M_FASTFWD_OURS) { + /* Avoid to set nexthop again. */ + args->next_hop6 = NULL; + return (action); + } + /* + * next_hop6 is a foreign address. + * ip6_fragment() uses mcopym() to copy unfragmentable + * header portions. mcopym() does copy the tags chain + * and mbuf flags, thus fwd_tag will be copyied to + * all fragments. + */ + } else if (ipfw_localip6(&args->f_id.dst_ip6) != 0) { + /* + * If destination address is our own, no need to make + * fragmentation. + */ + if (!IN6_IS_ADDR_LINKLOCAL(&args->f_id.dst_ip6)) { + /* + * IPv6 LLA needs scope verification, that + * is skipped when M_FASTFWD_OURS is set. + */ + args->m->m_flags |= M_FASTFWD_OURS; + } + return (action); + } + /* FALLTHROUGH */ + case IP_FW_TEE: + case IP_FW_DUMMYNET: + case IP_FW_NETGRAPH: + case IP_FW_NGTEE: + break; + default: + return (IP_FW_DENY); + } + + /* Fragment size must be a multiple of 8. */ + m0 = args->m; + m0->m_pkthdr.ip6frag_maxsize &= ~7; + /* Save rcvif, ip6_fragment() will reset it to NULL. */ + rcvif = m0->m_pkthdr.rcvif; + /* Set next header to IPPROTO_FRAGMENT */ + *(uint8_t *)ip6_get_prevhdr(m0, + m0->m_pkthdr.ip6frag_offset) = IPPROTO_FRAGMENT; + error = ip6_fragment(args->oif, m0, + m0->m_pkthdr.ip6frag_offset, + args->f_id.proto, m0->m_pkthdr.ip6frag_maxsize, + m0->m_pkthdr.ip6frag_ident); + + m = m0->m_nextpkt; + m0->m_nextpkt = NULL; + m_freem(m0); + args->m = NULL; + for (m0 = m; m != NULL; m = m0) { + m0 = m->m_nextpkt; + m->m_nextpkt = NULL; + if (error == 0) { + /* Put fragments into netisr queue. */ + m->m_flags |= M_SKIP_FIREWALL; + m->m_pkthdr.rcvif = rcvif; + netisr_queue(NETISR_IPV6, m); + } else + m_freem(m); + } + /* + * All fragments are queued for new processing with M_SKIP_FIREWALL + * flag or freed due to error in ip6_fragment(). This mbuf is + * considered as consumed, so return IP_FW_DENY. + */ + return (IP_FW_DENY); +} +#endif +/* * The pfilter hook to pass packets to ipfw_chk and then to * dummynet, divert, netgraph or other modules. * The packet may be consumed. @@ -124,8 +295,10 @@ { struct ip_fw_args args; struct m_tag *tag; - int ipfw; - int ret; + int ipfw, ret; +#ifdef INET6 + int refrag = 0; +#endif /* convert dir to IPFW values */ dir = (dir == PFIL_IN) ? DIR_IN : DIR_OUT; @@ -149,6 +322,10 @@ args.inp = inp; ipfw = ipfw_chk(&args); +#ifdef INET6 + if (refrag != 0 && args.m != NULL) + ipfw = ipfw_ip6_fragment(&args, dir, ipfw); +#endif *m0 = args.m; KASSERT(*m0 != NULL || ipfw == IP_FW_DENY, ("%s: m0 is NULL", @@ -161,72 +338,7 @@ /* next_hop may be set by ipfw_chk */ if (args.next_hop == NULL && args.next_hop6 == NULL) break; /* pass */ -#if (!defined(INET6) && !defined(INET)) - ret = EACCES; -#else - { - struct m_tag *fwd_tag; - size_t len; - - KASSERT(args.next_hop == NULL || args.next_hop6 == NULL, - ("%s: both next_hop=%p and next_hop6=%p not NULL", __func__, - args.next_hop, args.next_hop6)); -#ifdef INET6 - if (args.next_hop6 != NULL) - len = sizeof(struct sockaddr_in6); -#endif -#ifdef INET - if (args.next_hop != NULL) - len = sizeof(struct sockaddr_in); -#endif - - /* Incoming packets should not be tagged so we do not - * m_tag_find. Outgoing packets may be tagged, so we - * reuse the tag if present. - */ - fwd_tag = (dir == DIR_IN) ? NULL : - m_tag_find(*m0, PACKET_TAG_IPFORWARD, NULL); - if (fwd_tag != NULL) { - m_tag_unlink(*m0, fwd_tag); - } else { - fwd_tag = m_tag_get(PACKET_TAG_IPFORWARD, len, - M_NOWAIT); - if (fwd_tag == NULL) { - ret = EACCES; - break; /* i.e. drop */ - } - } -#ifdef INET6 - if (args.next_hop6 != NULL) { - struct sockaddr_in6 *sa6; - - sa6 = (struct sockaddr_in6 *)(fwd_tag + 1); - bcopy(args.next_hop6, sa6, len); - /* - * If nh6 address is link-local we should convert - * it to kernel internal form before doing any - * comparisons. - */ - if (sa6_embedscope(sa6, V_ip6_use_defzone) != 0) { - ret = EACCES; - break; - } - if (in6_localip(&sa6->sin6_addr)) - (*m0)->m_flags |= M_FASTFWD_OURS; - (*m0)->m_flags |= M_IP6_NEXTHOP; - } -#endif -#ifdef INET - if (args.next_hop != NULL) { - bcopy(args.next_hop, (fwd_tag+1), len); - if (in_localip(args.next_hop->sin_addr)) - (*m0)->m_flags |= M_FASTFWD_OURS; - (*m0)->m_flags |= M_IP_NEXTHOP; - } -#endif - m_tag_prepend(*m0, fwd_tag); - } -#endif /* INET || INET6 */ + ret = ipfw_setnexthop(&args, dir); break; case IP_FW_DENY: @@ -237,9 +349,9 @@ ret = EACCES; if (ip_dn_io_ptr == NULL) break; /* i.e. drop */ - if (mtod(*m0, struct ip *)->ip_v == 4) + if (args.f_id.addr_type == 4) ret = ip_dn_io_ptr(m0, dir, &args); - else if (mtod(*m0, struct ip *)->ip_v == 6) + else if (args.f_id.addr_type == 6) ret = ip_dn_io_ptr(m0, dir | PROTO_IPV6, &args); else break; /* drop it */ @@ -260,7 +372,7 @@ ret = EACCES; break; /* i.e. drop */ } - ret = ipfw_divert(m0, dir, &args.rule, + ret = ipfw_divert(m0, dir, &args, (ipfw == IP_FW_TEE) ? 1 : 0); /* continue processing for the original packet (tee). */ if (*m0) @@ -286,8 +398,18 @@ goto again; case IP_FW_REASS: +#ifdef INET6 + /* + * IPv6 router does not perform IP fragmentation for foreign + * packets. Thus we need to make fragmentation on their own, + * otherwise ip6_output/ip6_forward will drop an reassembled + * packet due to the MTU exceeding. + */ + if (args.f_id.addr_type == 6) + refrag = 1; +#endif goto again; /* continue with packet */ - + default: KASSERT(0, ("%s: unknown retval", __func__)); } @@ -298,7 +420,7 @@ *m0 = NULL; } - return ret; + return (ret); } /* @@ -400,94 +522,52 @@ /* do the divert, return 1 on error 0 on success */ static int -ipfw_divert(struct mbuf **m0, int incoming, struct ipfw_rule_ref *rule, - int tee) +ipfw_divert(struct mbuf **m0, int incoming, struct ip_fw_args *args, int tee) { + struct m_tag *tag; + struct mbuf *clone; + /* * ipfw_chk() has already tagged the packet with the divert tag. * If tee is set, copy packet and return original. * If not tee, consume packet and send it to divert socket. + * + * Divert listeners can normally handle non-fragmented packets, + * but we can only reass in the non-tee case. + * This means that listeners on a tee rule may get fragments, + * and have to live with that. + * Note that we now have the 'reass' ipfw option so if we care + * we can do it before a 'tee'. */ - struct mbuf *clone; - struct ip *ip = mtod(*m0, struct ip *); - struct m_tag *tag; - - /* Cloning needed for tee? */ if (tee == 0) { + if (ipfw_reass(m0, args->f_id.addr_type, 0) != 0 && + *m0 == NULL) + return (0); /* consumed by reassembly code */ clone = *m0; /* use the original mbuf */ *m0 = NULL; } else { clone = m_dup(*m0, M_NOWAIT); - /* If we cannot duplicate the mbuf, we sacrifice the divert + /* + * If we cannot duplicate the mbuf, we sacrifice the divert * chain and continue with the tee-ed packet. */ if (clone == NULL) - return 1; + return (1); } - /* - * Divert listeners can normally handle non-fragmented packets, - * but we can only reass in the non-tee case. - * This means that listeners on a tee rule may get fragments, - * and have to live with that. - * Note that we now have the 'reass' ipfw option so if we care - * we can do it before a 'tee'. - */ - if (!tee) switch (ip->ip_v) { - case IPVERSION: - if (ntohs(ip->ip_off) & (IP_MF | IP_OFFMASK)) { - int hlen; - struct mbuf *reass; - - reass = ip_reass(clone); /* Reassemble packet. */ - if (reass == NULL) - return 0; /* not an error */ - /* if reass = NULL then it was consumed by ip_reass */ - /* - * IP header checksum fixup after reassembly and leave header - * in network byte order. - */ - ip = mtod(reass, struct ip *); - hlen = ip->ip_hl << 2; - ip->ip_sum = 0; - if (hlen == sizeof(struct ip)) - ip->ip_sum = in_cksum_hdr(ip); - else - ip->ip_sum = in_cksum(reass, hlen); - clone = reass; - } - break; -#ifdef INET6 - case IPV6_VERSION >> 4: - { - struct ip6_hdr *const ip6 = mtod(clone, struct ip6_hdr *); - - if (ip6->ip6_nxt == IPPROTO_FRAGMENT) { - int nxt, off; - - off = sizeof(struct ip6_hdr); - nxt = frag6_input(&clone, &off, 0); - if (nxt == IPPROTO_DONE) - return (0); - } - break; - } -#endif - } - /* attach a tag to the packet with the reinject info */ tag = m_tag_alloc(MTAG_IPFW_RULE, 0, - sizeof(struct ipfw_rule_ref), M_NOWAIT); + sizeof(struct ipfw_rule_ref), M_NOWAIT); if (tag == NULL) { FREE_PKT(clone); - return 1; + return (1); } - *((struct ipfw_rule_ref *)(tag+1)) = *rule; + *((struct ipfw_rule_ref *)(tag + 1)) = args->rule; m_tag_prepend(clone, tag); /* Do the dirty job... */ ip_divert_ptr(clone, incoming); - return 0; + return (0); } /* Index: sys/netpfil/ipfw/ip_fw_private.h =================================================================== --- sys/netpfil/ipfw/ip_fw_private.h +++ sys/netpfil/ipfw/ip_fw_private.h @@ -619,12 +619,15 @@ void ipfw_iface_add_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic); void ipfw_iface_del_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic); -/* In ip_fw_sockopt.c */ +/* In ip_fw_sockopt.c/ip_fw2.c */ void ipfw_init_skipto_cache(struct ip_fw_chain *chain); void ipfw_destroy_skipto_cache(struct ip_fw_chain *chain); int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id); int ipfw_ctl3(struct sockopt *sopt); int ipfw_chk(struct ip_fw_args *args); +int ipfw_reass(struct mbuf **, int, int); +int ipfw_ip6lasthdr(struct mbuf *, int *); +int ipfw_localip6(struct in6_addr *); void ipfw_reap_add(struct ip_fw_chain *chain, struct ip_fw **head, struct ip_fw *rule); void ipfw_reap_rules(struct ip_fw *head); Index: sys/netpfil/ipfw/pmod/tcpmod.c =================================================================== --- sys/netpfil/ipfw/pmod/tcpmod.c +++ sys/netpfil/ipfw/pmod/tcpmod.c @@ -119,26 +119,12 @@ static int tcpmod_ipv6_setmss(struct mbuf **mp, uint16_t mss) { - struct ip6_hdr *ip6; - struct ip6_hbh *hbh; struct tcphdr *tcp; int hlen, plen, proto; - ip6 = mtod(*mp, struct ip6_hdr *); - hlen = sizeof(*ip6); - proto = ip6->ip6_nxt; - /* - * Skip IPv6 extension headers and get the TCP header. - * ipfw_chk() has already done this work. So we are sure that - * we will not do an access to the out of bounds. For this - * reason we skip some checks here. - */ - while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING || - proto == IPPROTO_DSTOPTS) { - hbh = mtodo(*mp, hlen); - proto = hbh->ip6h_nxt; - hlen += hbh->ip6h_len << 3; - } + proto = ipfw_ip6lasthdr(*mp, &hlen); + MPASS(proto == IPPROTO_TCP); + tcp = mtodo(*mp, hlen); plen = (*mp)->m_pkthdr.len - hlen; hlen = tcp->th_off << 2; Index: sys/sys/mbuf.h =================================================================== --- sys/sys/mbuf.h +++ sys/sys/mbuf.h @@ -188,6 +188,9 @@ #define lro_nsegs tso_segsz #define csum_phsum PH_per.sixteen[2] #define csum_data PH_per.thirtytwo[1] +#define ip6frag_maxsize PH_loc.sixteen[0] +#define ip6frag_offset PH_loc.sixteen[1] +#define ip6frag_ident PH_loc.thirtytwo[1] /* * Description of external storage mapped into mbuf; valid only if M_EXT is