diff --git a/sys/net/pfil.c b/sys/net/pfil.c --- a/sys/net/pfil.c +++ b/sys/net/pfil.c @@ -115,6 +115,10 @@ static struct pfil_link *pfil_link_remove(pfil_chain_t *, pfil_hook_t ); static void pfil_link_free(epoch_context_t); +/* pf(4) functions accessible for dummynet*/ +void (*pf_forward4_p)(struct mbuf **, u_int8_t); +void (*pf_forward6_p)(struct mbuf **, u_int8_t); + /* * To couple a filtering point that provides memory pointer with a filter that * works on mbufs only. diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -2326,6 +2326,7 @@ struct pf_mtag *pf_get_mtag(struct mbuf *); +void pf_skip_hook(struct mbuf *, sa_family_t, u_int8_t); extern void pf_calc_skip_steps(struct pf_krulequeue *); #ifdef ALTQ extern void pf_altq_ifnet_event(struct ifnet *, int); @@ -2461,8 +2462,13 @@ #ifdef INET6 int pf_normalize_ip6(int, u_short *, struct pf_pdesc *); int pf_max_frag_size(struct mbuf *); +typedef enum { + PF_REFRAGMENT6_IP6_OUTPUT = 0, + PF_REFRAGMENT6_IP6_FORWARD, + PF_REFRAGMENT6_PF_FORWARD, +} pf_refragment6_forward_t; int pf_refragment6(struct ifnet *, struct mbuf **, struct m_tag *, - struct ifnet *, bool); + struct ifnet *, pf_refragment6_forward_t); #endif /* INET6 */ int pf_multihome_scan_init(int, int, struct pf_pdesc *); @@ -2506,7 +2512,7 @@ struct pf_state_key *pf_alloc_state_key(int); int pf_translate(struct pf_pdesc *, struct pf_addr *, u_int16_t, struct pf_addr *, u_int16_t, u_int16_t, int); -int pf_translate_af(struct pf_pdesc *); +int pf_translate_af(struct pf_pdesc *, struct pf_kstate *); bool pf_init_threshold(struct pf_kthreshold *, uint32_t, uint32_t); void pfr_initialize(void); @@ -2594,7 +2600,7 @@ const struct pf_addr *, const struct pf_addr *, u_int16_t, u_int16_t, u_int32_t, u_int32_t, u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, - u_int16_t, u_int16_t, int); + u_int16_t, u_int16_t, int, struct pf_kstate *); void pf_syncookies_init(void); void pf_syncookies_cleanup(void); @@ -2749,6 +2755,14 @@ #if defined(INET) || defined(INET6) void pf_scrub(struct pf_pdesc *); #endif +#ifdef INET +extern void (*pf_forward4_p)(struct mbuf **, u_int8_t); +void pf_forward4(struct mbuf **, u_int8_t); +#endif /* INET */ +#ifdef INET6 +extern void (*pf_forward6_p)(struct mbuf **, u_int8_t); +void pf_forward6(struct mbuf **, u_int8_t); +#endif /* INET6 */ struct pfi_kkif *pf_kkif_create(int); void pf_kkif_free(struct pfi_kkif *); diff --git a/sys/netpfil/ipfw/ip_dn_io.c b/sys/netpfil/ipfw/ip_dn_io.c --- a/sys/netpfil/ipfw/ip_dn_io.c +++ b/sys/netpfil/ipfw/ip_dn_io.c @@ -30,6 +30,7 @@ * Dummynet portions related to packet handling. */ #include +#include "opt_inet.h" #include "opt_inet6.h" #include @@ -51,6 +52,7 @@ #include /* NET_EPOCH_... */ #include #include +#include /* pf_forward[46]_p */ #include #include @@ -69,6 +71,7 @@ #include #endif #include +#include /* * We keep a private variable for the simulation time, but we could @@ -88,6 +91,10 @@ extern void (*bridge_dn_p)(struct mbuf *, struct ifnet *); +/* pf functions accessible for dummynet */ +extern void (*pf_forward4_p)(struct mbuf **, u_int8_t); +extern void (*pf_forward6_p)(struct mbuf **, u_int8_t); + #ifdef SYSCTL_NODE /* @@ -772,6 +779,7 @@ dummynet_send(struct mbuf *m) { struct mbuf *n; + struct pf_mtag *pf_mtag; NET_EPOCH_ASSERT(); @@ -802,22 +810,44 @@ } } + pf_mtag = pf_find_mtag(m); switch (dst) { case DIR_OUT: - ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); + if (pf_mtag && + pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO && + pf_forward4_p != NULL) { + pf_forward4_p(&m, PF_OUT); + } else { + ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); + } break ; case DIR_IN : - netisr_dispatch(NETISR_IP, m); + if (pf_mtag && + pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO && + pf_forward4_p != NULL) + pf_forward4_p(&m, PF_IN); + else + netisr_dispatch(NETISR_IP, m); break; #ifdef INET6 case DIR_IN | PROTO_IPV6: - netisr_dispatch(NETISR_IPV6, m); + if (pf_mtag && + pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO && + pf_forward6_p != NULL) + pf_forward6_p(&m, PF_IN); + else + netisr_dispatch(NETISR_IPV6, m); break; case DIR_OUT | PROTO_IPV6: - ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL); + if (pf_mtag && + pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO && + pf_forward6_p != NULL) + pf_forward6_p(&m, PF_OUT); + else + ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL); break; #endif diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h --- a/sys/netpfil/pf/pf.h +++ b/sys/netpfil/pf/pf.h @@ -163,7 +163,8 @@ #define PFRES_SYNPROXY 14 /* SYN proxy */ #define PFRES_MAPFAILED 15 /* pf_map_addr() failed */ #define PFRES_TRANSLATE 16 /* No translation address available */ -#define PFRES_MAX 17 /* total+1 */ +#define PFRES_NOROUTE 17 /* No route for af-to */ +#define PFRES_MAX 18 /* total+1 */ #define PFRES_NAMES { \ "match", \ @@ -183,6 +184,7 @@ "synproxy", \ "map-failed", \ "translate", \ + "no-route", \ NULL \ } diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -336,10 +336,7 @@ static __inline void pf_dummynet_flag_remove(struct mbuf *m, struct pf_mtag *pf_mtag); static int pf_dummynet(struct pf_pdesc *, struct pf_kstate *, - struct pf_krule *, struct mbuf **); -static int pf_dummynet_route(struct pf_pdesc *, - struct pf_kstate *, struct pf_krule *, - struct ifnet *, const struct sockaddr *, struct mbuf **); + struct pf_krule *); static int pf_test_eth_rule(int, struct pfi_kkif *, struct mbuf **); static int pf_test_rule(struct pf_krule **, struct pf_kstate **, @@ -406,19 +403,13 @@ static void pf_purge_unlinked_rules(void); static int pf_mtag_uminit(void *, int, int); static void pf_mtag_free(struct m_tag *); -static void pf_packet_rework_nat(struct pf_pdesc *, int, - struct pf_state_key *); -#ifdef INET -static int pf_route(struct pf_krule *, - struct ifnet *, struct pf_kstate *, - struct pf_pdesc *, struct inpcb *); -#endif /* INET */ +static int pf_route_to(struct mbuf *, struct pf_kstate *, + const struct pf_krule *, struct pf_rule_actions *, + struct ifnet *, u_int8_t, sa_family_t , + sa_family_t, struct pf_mtag **); #ifdef INET6 static void pf_change_a6(struct pf_addr *, u_int16_t *, struct pf_addr *, u_int8_t); -static int pf_route6(struct pf_krule *, - struct ifnet *, struct pf_kstate *, - struct pf_pdesc *, struct inpcb *); #endif /* INET6 */ static __inline void pf_set_protostate(struct pf_kstate *, int, u_int8_t); @@ -429,16 +420,6 @@ VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]); -#define PACKET_UNDO_NAT(_pd, _off, _s) \ - do { \ - struct pf_state_key *nk; \ - if ((pd->dir) == PF_OUT) \ - nk = (_s)->key[PF_SK_STACK]; \ - else \ - nk = (_s)->key[PF_SK_WIRE]; \ - pf_packet_rework_nat(_pd, _off, nk); \ - } while (0) - #define PACKET_LOOPED(pd) ((pd)->pf_mtag && \ (pd)->pf_mtag->flags & PF_MTAG_FLAG_PACKET_LOOPED) @@ -617,96 +598,185 @@ } } -static void -pf_packet_rework_nat(struct pf_pdesc *pd, int off, struct pf_state_key *nk) +static int +pf_undo_nat_state(struct mbuf *m, sa_family_t af, struct pf_kstate *st, + u_int8_t dir) { + struct pf_pdesc pd; + struct pf_state_key *nk; + u_short action; + u_short reason; - switch (pd->virtual_proto) { + memset(&pd, 0, sizeof(pd)); + + /* Set up a minimal pd for pf_change_ap */ + pd.dir = dir; + pd.m = m; + pd.sidx = (dir == PF_IN) ? 0 : 1; + pd.didx = (dir == PF_IN) ? 1 : 0; + pd.af = pd.naf = af; + + if (dir == PF_OUT) + nk = st->key[PF_SK_STACK]; + else + nk = st->key[PF_SK_WIRE]; + + MPASS(nk != NULL); + + switch (af) { +#ifdef INET + case AF_INET: { + struct ip *h; + h = mtod(pd.m, struct ip *); + if (pf_walk_header(&pd, h, &reason) != PF_PASS) { + printf("%s: pf_walk_header failed for IPv4\n", __func__); + return (-1); + } + pd.src = (struct pf_addr *)&h->ip_src; + pd.dst = (struct pf_addr *)&h->ip_dst; + pd.ip_sum = &h->ip_sum; + pd.tot_len = ntohs(h->ip_len); + pd.df = h->ip_off & htons(IP_DF); + pd.virtual_proto = (h->ip_off & htons(IP_MF | IP_OFFMASK)) ? + PF_VPROTO_FRAGMENT : pd.proto; + } + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: { + struct ip6_hdr *h; + h = mtod(pd.m, struct ip6_hdr *); + if (pf_walk_header6(&pd, h, &reason) != PF_PASS) { + printf("%s: pf_walk_header failed for IPv6\n", __func__); + return (-1); + } + pd.src = (struct pf_addr *)&h->ip6_src; + pd.dst = (struct pf_addr *)&h->ip6_dst; + pd.ip_sum = NULL; + pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); + pd.virtual_proto = (pd.fragoff != 0) ? + PF_VPROTO_FRAGMENT : pd.proto; + } + break; +#endif /* INET6 */ + } + + switch (pd.virtual_proto) { case IPPROTO_TCP: { - struct tcphdr *th = &pd->hdr.tcp; + struct tcphdr *th = &pd.hdr.tcp; + if (!pf_pull_hdr(pd.m, pd.off, th, sizeof(*th), &action, + &reason, af)) { + printf("%s: pf_pull_hdr failed for TCP\n", __func__); + return (-1); + } - if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) - pf_change_ap(pd, pd->src, &th->th_sport, - &nk->addr[pd->sidx], nk->port[pd->sidx]); - if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) - pf_change_ap(pd, pd->dst, &th->th_dport, - &nk->addr[pd->didx], nk->port[pd->didx]); - m_copyback(pd->m, off, sizeof(*th), (caddr_t)th); + if (PF_ANEQ(pd.src, &nk->addr[pd.sidx], pd.af)) + pf_change_ap(&pd, pd.src, &th->th_sport, + &nk->addr[pd.sidx], nk->port[pd.sidx]); + if (PF_ANEQ(pd.dst, &nk->addr[pd.didx], pd.af)) + pf_change_ap(&pd, pd.dst, &th->th_dport, + &nk->addr[pd.didx], nk->port[pd.didx]); + m_copyback(pd.m, pd.off, sizeof(*th), (caddr_t)th); break; } case IPPROTO_UDP: { - struct udphdr *uh = &pd->hdr.udp; + struct udphdr *uh = &pd.hdr.udp; + if (!pf_pull_hdr(pd.m, pd.off, uh, sizeof(*uh), &action, + &reason, af)) { + printf("%s: pf_pull_hdr failed for UDP\n", __func__); + return (-1); + } - if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) - pf_change_ap(pd, pd->src, &uh->uh_sport, - &nk->addr[pd->sidx], nk->port[pd->sidx]); - if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) - pf_change_ap(pd, pd->dst, &uh->uh_dport, - &nk->addr[pd->didx], nk->port[pd->didx]); - m_copyback(pd->m, off, sizeof(*uh), (caddr_t)uh); + if (PF_ANEQ(pd.src, &nk->addr[pd.sidx], pd.af)) + pf_change_ap(&pd, pd.src, &uh->uh_sport, + &nk->addr[pd.sidx], nk->port[pd.sidx]); + if (PF_ANEQ(pd.dst, &nk->addr[pd.didx], pd.af)) + pf_change_ap(&pd, pd.dst, &uh->uh_dport, + &nk->addr[pd.didx], nk->port[pd.didx]); + m_copyback(pd.m, pd.off, sizeof(*uh), (caddr_t)uh); break; } case IPPROTO_SCTP: { - struct sctphdr *sh = &pd->hdr.sctp; - - if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) { - pf_change_ap(pd, pd->src, &sh->src_port, - &nk->addr[pd->sidx], nk->port[pd->sidx]); + struct sctphdr *sh = &pd.hdr.sctp; + if (!pf_pull_hdr(pd.m, pd.off, sh, sizeof(*sh), + &action, &reason, af)) { + printf("%s: pf_pull_hdr failed for SCTP\n", __func__); + return (-1); } - if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) { - pf_change_ap(pd, pd->dst, &sh->dest_port, - &nk->addr[pd->didx], nk->port[pd->didx]); + + if (PF_ANEQ(pd.src, &nk->addr[pd.sidx], pd.af)) { + pf_change_ap(&pd, pd.src, &sh->src_port, + &nk->addr[pd.sidx], nk->port[pd.sidx]); + } + if (PF_ANEQ(pd.dst, &nk->addr[pd.didx], pd.af)) { + pf_change_ap(&pd, pd.dst, &sh->dest_port, + &nk->addr[pd.didx], nk->port[pd.didx]); } break; } case IPPROTO_ICMP: { - struct icmp *ih = &pd->hdr.icmp; + struct icmp *ih = &pd.hdr.icmp; + if (!pf_pull_hdr(pd.m, pd.off, ih, ICMP_MINLEN, + &action, &reason, af)) { + printf("%s: pf_pull_hdr failed for ICMP\n", __func__); + return (-1); + } - if (nk->port[pd->sidx] != ih->icmp_id) { - pd->hdr.icmp.icmp_cksum = pf_cksum_fixup( + if (nk->port[pd.sidx] != ih->icmp_id) { + pd.hdr.icmp.icmp_cksum = pf_cksum_fixup( ih->icmp_cksum, ih->icmp_id, - nk->port[pd->sidx], 0); - ih->icmp_id = nk->port[pd->sidx]; - pd->sport = &ih->icmp_id; + nk->port[pd.sidx], 0); + ih->icmp_id = nk->port[pd.sidx]; + pd.sport = &ih->icmp_id; - m_copyback(pd->m, off, ICMP_MINLEN, (caddr_t)ih); + m_copyback(pd.m, pd.off, ICMP_MINLEN, (caddr_t)ih); } /* FALLTHROUGH */ } default: - if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) { - switch (pd->af) { + if (PF_ANEQ(pd.src, &nk->addr[pd.sidx], pd.af)) { + switch (pd.af) { +#ifdef INET case AF_INET: - pf_change_a(&pd->src->v4.s_addr, - pd->ip_sum, nk->addr[pd->sidx].v4.s_addr, + pf_change_a(&pd.src->v4.s_addr, + pd.ip_sum, nk->addr[pd.sidx].v4.s_addr, 0); break; +#endif /* INET */ +#ifdef INET6 case AF_INET6: - pf_addrcpy(pd->src, &nk->addr[pd->sidx], - pd->af); + pf_addrcpy(pd.src, &nk->addr[pd.sidx], + pd.af); break; +#endif /* INET6 */ default: - unhandled_af(pd->af); + unhandled_af(pd.af); } } - if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) { - switch (pd->af) { + if (PF_ANEQ(pd.dst, &nk->addr[pd.didx], pd.af)) { + switch (pd.af) { +#ifdef INET case AF_INET: - pf_change_a(&pd->dst->v4.s_addr, - pd->ip_sum, nk->addr[pd->didx].v4.s_addr, + pf_change_a(&pd.dst->v4.s_addr, + pd.ip_sum, nk->addr[pd.didx].v4.s_addr, 0); break; +#endif /* INET */ +#ifdef INET6 case AF_INET6: - pf_addrcpy(pd->dst, &nk->addr[pd->didx], - pd->af); + pf_addrcpy(pd.dst, &nk->addr[pd.didx], + pd.af); break; +#endif /* INET6 */ default: - unhandled_af(pd->af); + unhandled_af(pd.af); } } break; } + + return (0); } static __inline uint32_t @@ -2401,9 +2471,10 @@ void pf_intr(void *v) { - struct epoch_tracker et; - struct pf_send_head queue; - struct pf_send_entry *pfse, *next; + struct epoch_tracker et; + struct pf_send_head queue; + struct pf_send_entry *pfse, *next; + struct pf_mtag *pf_mtag; CURVNET_SET((struct vnet *)v); @@ -2415,6 +2486,7 @@ NET_EPOCH_ENTER(et); STAILQ_FOREACH_SAFE(pfse, &queue, pfse_next, next) { + pf_mtag = pf_find_mtag(pfse->pfse_m); switch (pfse->pfse_type) { #ifdef INET case PFSE_IP: { @@ -2429,8 +2501,12 @@ pfse->pfse_m->m_pkthdr.csum_data = 0xffff; ip_input(pfse->pfse_m); } else { - ip_output(pfse->pfse_m, NULL, NULL, 0, NULL, - NULL); + if (pf_mtag && + pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO) + pf_forward4(&(pfse->pfse_m), PF_OUT); + else + ip_output(pfse->pfse_m, NULL, NULL, 0, + NULL, NULL); } break; } @@ -2452,8 +2528,12 @@ pfse->pfse_m->m_pkthdr.csum_data = 0xffff; ip6_input(pfse->pfse_m); } else { - ip6_output(pfse->pfse_m, NULL, NULL, 0, NULL, - NULL, NULL); + if (pf_mtag && + pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO) + pf_forward6(&(pfse->pfse_m), PF_OUT); + else + ip6_output(pfse->pfse_m, NULL, NULL, 0, + NULL, NULL, NULL); } break; case PFSE_ICMP6: @@ -2793,7 +2873,7 @@ s->key[PF_SK_WIRE]->port[0], s->src.seqhi, s->src.seqlo + 1, TH_RST|TH_ACK, 0, 0, 0, M_SKIP_FIREWALL, s->tag, 0, - s->act.rtableid); + s->act.rtableid, NULL); } LIST_REMOVE(s, entry); @@ -3555,7 +3635,7 @@ } int -pf_translate_af(struct pf_pdesc *pd) +pf_translate_af(struct pf_pdesc *pd, struct pf_kstate *st) { #if defined(INET) && defined(INET6) struct mbuf *mp; @@ -3627,6 +3707,21 @@ return (-1); } + /* + * Af-to performed on `pass in` rules is problematic. + * If an IP address of the outbound interface is used for SNAT + * it would be impossible to return such traffic back through normal + * post-af-to routing. To prevent such issues af-to on inbound rules + * creates a state spanning both inbound and outbound interfaces + * and bypasses outbound filtering. This is the original OpenBSD + * implemmentation. FreeBSD supports af-to on `pass out` rules too, + * then such tricks are unnecessary but additional routing to guide + * pre-af-to packets towards the outbound interface must be installed. + */ + if (st->direction == PF_IN) { + pd->m->m_flags |= M_SKIP_FIREWALL; + } + /* recalculate icmp/icmp6 checksums */ if (pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6) { int off; @@ -4028,6 +4123,7 @@ struct tcphdr *th; char *opt; struct pf_mtag *pf_mtag; + struct m_tag *pfil_mtag; len = 0; th = NULL; @@ -4069,6 +4165,18 @@ pf_mtag->tag = mtag_tag; pf_mtag->flags = mtag_flags; + /* + * The recreated mbuf must behave as if it has been through pfil + * loop protection. + */ + pfil_mtag = m_tag_alloc(MTAG_PFIL, MTAG_PFIL_NEXT_HOOK, sizeof(void*), + M_ZERO | M_NOWAIT); + if (pfil_mtag == NULL) { + m_freem(m); + return (NULL); + } + m_tag_prepend(m, pfil_mtag); + if (rtableid >= 0) M_SETFIB(m, rtableid); @@ -4281,13 +4389,16 @@ const struct pf_addr *saddr, const struct pf_addr *daddr, u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, u_int8_t tcp_flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, - int mbuf_flags, u_int16_t mtag_tag, u_int16_t mtag_flags, int rtableid) + int mbuf_flags, u_int16_t mtag_tag, u_int16_t mtag_flags, int rtableid, + struct pf_kstate *st) { - struct pf_send_entry *pfse; - struct mbuf *m; + struct pf_send_entry *pfse; + struct mbuf *m; + struct pf_mtag *pf_mtag; m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, tcp_flags, win, mss, ttl, mbuf_flags, mtag_tag, mtag_flags, 0, rtableid); + if (m == NULL) return; @@ -4298,6 +4409,22 @@ return; } + /* + * State is given for sending out packets created by synproxy. + * We are sending packets recreated by pf, pretending to be packets + * coming from the source, so treat them as if they are incoming + * and already filtered. + */ + if (st != NULL && (st->act.rt == PF_ROUTETO || st->act.rt == PF_REPLYTO)) { + if (pf_route_to(m, st, r, &(st->act), NULL, st->direction, + af, af, &pf_mtag)) { + m_freem(m); + return; + } + if (st->direction == PF_OUT) + pf_skip_hook(m, af, st->direction); + } + switch (af) { #ifdef INET case AF_INET: @@ -4358,7 +4485,8 @@ pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, - r->return_ttl, M_SKIP_FIREWALL, 0, 0, rtableid); + r->return_ttl, M_SKIP_FIREWALL, 0, 0, rtableid, + NULL); } } else if (pd->proto == IPPROTO_SCTP && (r->rule_flag & PFRULE_RETURN)) { @@ -4423,7 +4551,7 @@ pf_send_tcp(s->rule, pd->af, pd->dst, pd->src, pd->hdr.tcp.th_dport, pd->hdr.tcp.th_sport, dst->seqlo, src->seqlo, TH_ACK, 0, 0, s->rule->return_ttl, 0, 0, 0, - s->rule->rtableid); + s->rule->rtableid, s); } static void @@ -4471,7 +4599,7 @@ M_SETFIB(m0, rtableid); #ifdef ALTQ - if (r->qid) { + if (r && r->qid) { pf_mtag->qid = r->qid; /* add hints for ecn */ pf_mtag->hdr = mtod(m0, struct ip *); @@ -5244,17 +5372,6 @@ SDT_PROBE3(pf, eth, test_rule, entry, dir, kif->pfik_ifp, m); mtag = pf_find_mtag(m); - if (mtag != NULL && mtag->flags & PF_MTAG_FLAG_DUMMYNET) { - /* Dummynet re-injects packets after they've - * completed their delay. We've already - * processed them, so pass unconditionally. */ - - /* But only once. We may see the packet multiple times (e.g. - * PFIL_IN/PFIL_OUT). */ - pf_dummynet_flag_remove(m, mtag); - - return (PF_PASS); - } if (__predict_false(m->m_len < sizeof(struct ether_header)) && (m = *m0 = m_pullup(*m0, sizeof(struct ether_header))) == NULL) { @@ -5472,8 +5589,6 @@ } PF_RULES_RUNLOCK(); - - mtag->flags |= PF_MTAG_FLAG_DUMMYNET; ip_dn_io_ptr(m0, &dnflow); if (*m0 != NULL) pf_dummynet_flag_remove(m, mtag); @@ -6253,7 +6368,7 @@ pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, TH_SYN|TH_ACK, 0, s->src.mss, 0, M_SKIP_FIREWALL, 0, 0, - pd->act.rtableid); + pd->act.rtableid, NULL); REASON_SET(&ctx->reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } @@ -6843,7 +6958,7 @@ th->th_sport, ntohl(th->th_ack), 0, TH_RST, 0, 0, state->rule->return_ttl, M_SKIP_FIREWALL, - 0, 0, state->act.rtableid); + 0, 0, state->act.rtableid, NULL); src->seqlo = 0; src->seqhi = 1; src->max_win = 1; @@ -6968,7 +7083,7 @@ pd->src, th->th_dport, th->th_sport, state->src.seqhi, ntohl(th->th_seq) + 1, TH_SYN|TH_ACK, 0, state->src.mss, 0, - M_SKIP_FIREWALL, 0, 0, state->act.rtableid); + M_SKIP_FIREWALL, 0, 0, state->act.rtableid, NULL); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if ((tcp_get_flags(th) & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK || @@ -7001,7 +7116,7 @@ state->dst.seqhi, 0, TH_SYN, 0, state->src.mss, 0, state->orig_kif->pfik_ifp == V_loif ? M_LOOP : 0, - state->tag, 0, state->act.rtableid); + state->tag, 0, state->act.rtableid, state); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if (((tcp_get_flags(th) & (TH_SYN|TH_ACK)) != @@ -7016,13 +7131,13 @@ pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ntohl(th->th_seq) + 1, TH_ACK, state->src.max_win, 0, 0, 0, - state->tag, 0, state->act.rtableid); + state->tag, 0, state->act.rtableid, state); pf_send_tcp(state->rule, pd->af, &sk->addr[pd->sidx], &sk->addr[pd->didx], sk->port[pd->sidx], sk->port[pd->didx], state->src.seqhi + 1, state->src.seqlo + 1, TH_ACK, state->dst.max_win, 0, 0, - M_SKIP_FIREWALL, 0, 0, state->act.rtableid); + M_SKIP_FIREWALL, 0, 0, state->act.rtableid, NULL); state->src.seqdiff = state->dst.seqhi - state->src.seqlo; state->dst.seqdiff = state->src.seqhi - @@ -8928,232 +9043,97 @@ return (0); } +/* + * Pf equivalent of ip_forward() and ip_output(). Used only in route-to + * and af-to cases, either directly or when reinjecting from dummynet. + */ #ifdef INET -static int -pf_route(struct pf_krule *r, struct ifnet *oifp, - struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp) + void +pf_forward4(struct mbuf **m, u_int8_t dir) { - struct mbuf *m0, *m1, *md; struct route ro; - const struct sockaddr *gw = &ro.ro_dst; - struct sockaddr_in *dst; + struct mbuf *m0; + struct ifnet *ifp; struct ip *ip; - struct ifnet *ifp = NULL; + struct sockaddr *gw = &ro.ro_dst; + struct pf_mtag *pf_mtag; + struct pf_kstate *st; int error = 0; uint16_t ip_len, ip_off; - uint16_t tmp; - int r_dir; - bool skip_test = false; - int action = PF_PASS; - KASSERT(pd->m && r && oifp, ("%s: invalid parameters", __func__)); + NET_EPOCH_ASSERT(); - SDT_PROBE4(pf, ip, route_to, entry, pd->m, pd, s, oifp); +#ifdef INVARIANTS + pf_mtag = pf_find_mtag(*m); +#endif /* INVARIANTS */ + MPASS(pf_mtag != NULL); + KASSERT(pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO, ("Called for non-route output")); - if (s) { - r_dir = s->direction; - } else { - r_dir = r->direction; + /* Routing for packets tagged in direction PF_IN */ + pf_mtag = pf_find_mtag(*m); + ifp = ifnet_byindexgen(pf_mtag->if_index, pf_mtag->if_idxgen); + if (ifp == NULL || ifp->if_flags & IFF_DYING) { + goto bad; } + if (ifp->if_flags & IFF_LOOPBACK) + (*m)->m_flags |= M_SKIP_FIREWALL; - KASSERT(pd->dir == PF_IN || pd->dir == PF_OUT || - r_dir == PF_IN || r_dir == PF_OUT, ("%s: invalid direction", - __func__)); - - if ((pd->pf_mtag == NULL && - ((pd->pf_mtag = pf_get_mtag(pd->m)) == NULL)) || - pd->pf_mtag->routed++ > 3) { - m0 = pd->m; - pd->m = NULL; - SDT_PROBE1(pf, ip, route_to, drop, __LINE__); - action = PF_DROP; - goto bad_locked; - } - - if (pd->act.rt_kif != NULL) - ifp = pd->act.rt_kif->pfik_ifp; - - if (pd->act.rt == PF_DUPTO) { - if ((pd->pf_mtag->flags & PF_MTAG_FLAG_DUPLICATED)) { - if (s != NULL) { - PF_STATE_UNLOCK(s); - } - if (ifp == oifp) { - /* When the 2nd interface is not skipped */ - return (action); - } else { - m0 = pd->m; - pd->m = NULL; - SDT_PROBE1(pf, ip, route_to, drop, __LINE__); - action = PF_DROP; - goto bad; - } - } else { - pd->pf_mtag->flags |= PF_MTAG_FLAG_DUPLICATED; - if (((m0 = m_dup(pd->m, M_NOWAIT)) == NULL)) { - if (s) - PF_STATE_UNLOCK(s); - return (action); - } + /* + * Continue with inbound pfil hooks. + * pf_test() won't be repeated thanks to the MTAG_PFIL_NEXT_HOOK tag. + */ + if (dir == PF_IN) { + if (pfil_mbuf_in(V_inet_pfil_head, m, + (*m)->m_pkthdr.rcvif, NULL) != PFIL_PASS) { + return; } - } else { - if ((pd->act.rt == PF_REPLYTO) == (r_dir == pd->dir)) { - if (pd->af == pd->naf) { - pf_dummynet(pd, s, r, &pd->m); - if (s) - PF_STATE_UNLOCK(s); - return (action); - } else { - if (r_dir == PF_IN) { - skip_test = true; - } - } - } - - /* - * If we're actually doing route-to and af-to and are in the - * reply direction. - */ - if (pd->act.rt_kif && pd->act.rt_kif->pfik_ifp && - pd->af != pd->naf) { - if (pd->act.rt == PF_ROUTETO && r->naf != AF_INET) { - /* Un-set ifp so we do a plain route lookup. */ - ifp = NULL; - } - if (pd->act.rt == PF_REPLYTO && r->naf != AF_INET6) { - /* Un-set ifp so we do a plain route lookup. */ - ifp = NULL; - } - } - m0 = pd->m; - } - - ip = mtod(m0, struct ip *); - - bzero(&ro, sizeof(ro)); - dst = (struct sockaddr_in *)&ro.ro_dst; - dst->sin_family = AF_INET; - dst->sin_len = sizeof(struct sockaddr_in); - dst->sin_addr.s_addr = pd->act.rt_addr.v4.s_addr; - - if (pd->dir == PF_IN) { + ip = mtod(*m, struct ip *); if (ip->ip_ttl <= IPTTLDEC) { - if (r->rt != PF_DUPTO) - pf_send_icmp(m0, ICMP_TIMXCEED, - ICMP_TIMXCEED_INTRANS, 0, pd->af, r, - pd->act.rtableid); - action = PF_DROP; - goto bad_locked; + pf_send_icmp(*m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, + 0, AF_INET, NULL, M_GETFIB(*m)); + goto bad; } ip->ip_ttl -= IPTTLDEC; } - if (s != NULL) { - if (ifp == NULL && (pd->af != pd->naf)) { - /* We're in the AFTO case. Do a route lookup. */ - const struct nhop_object *nh; - nh = fib4_lookup(M_GETFIB(m0), ip->ip_dst, 0, NHR_NONE, 0); - if (nh) { - ifp = nh->nh_ifp; - - /* Use the gateway if needed. */ - if (nh->nh_flags & NHF_GATEWAY) { - gw = &nh->gw_sa; - ro.ro_flags |= RT_HAS_GW; - } else { - dst->sin_addr = ip->ip_dst; - } - - /* - * Bind to the correct interface if we're - * if-bound. We don't know which interface - * that will be until here, so we've inserted - * the state on V_pf_all. Fix that now. - */ - if (s->kif == V_pfi_all && ifp != NULL && - r->rule_flag & PFRULE_IFBOUND) - s->kif = ifp->if_pf_kif; - } - } - - if (r->rule_flag & PFRULE_IFBOUND && - pd->act.rt == PF_REPLYTO && - s->kif == V_pfi_all) { - s->kif = pd->act.rt_kif; - s->orig_kif = oifp->if_pf_kif; - } - - PF_STATE_UNLOCK(s); + /* + * Forward through outbound pfil hooks. This will cover outbound + * filtering by pf. pf_test() called by the outbound pfil hook will + * call pf_forward4() on its own and send the packet out. Unless + * there is "set skip" on the outbound interface or the mbuf is tagged + * with M_SKIP_FIREWALL, then the inbound pf_forward4() will send + * the packet out. + */ + if (pfil_mbuf_out(V_inet_pfil_head, m, ifp, NULL) != PFIL_PASS) { + return; } - if (ifp == NULL) { - m0 = pd->m; - pd->m = NULL; - action = PF_DROP; - SDT_PROBE1(pf, ip, route_to, drop, __LINE__); + /* Routing for packets tagged in direction PF_OUT */ + pf_mtag = pf_find_mtag(*m); + ifp = ifnet_byindexgen(pf_mtag->if_index, pf_mtag->if_idxgen); + if (ifp == NULL || ifp->if_flags & IFF_DYING) { goto bad; } - - if (r->rt == PF_DUPTO) - skip_test = true; - - if (pd->dir == PF_IN && !skip_test) { - if (pf_test(AF_INET, PF_OUT, PFIL_FWD, ifp, &m0, inp, - &pd->act) != PF_PASS) { - action = PF_DROP; - SDT_PROBE1(pf, ip, route_to, drop, __LINE__); - goto bad; - } else if (m0 == NULL) { - action = PF_DROP; - SDT_PROBE1(pf, ip, route_to, drop, __LINE__); - goto done; - } - if (m0->m_len < sizeof(struct ip)) { - DPFPRINTF(PF_DEBUG_URGENT, - "%s: m0->m_len < sizeof(struct ip)", __func__); - SDT_PROBE1(pf, ip, route_to, drop, __LINE__); - action = PF_DROP; - goto bad; - } - ip = mtod(m0, struct ip *); - } - if (ifp->if_flags & IFF_LOOPBACK) - m0->m_flags |= M_SKIP_FIREWALL; + (*m)->m_flags |= M_SKIP_FIREWALL; + bzero(&ro, sizeof(ro)); + memcpy(gw, &(pf_mtag->dst), sizeof(struct sockaddr_in)); + ro.ro_flags = pf_mtag->ro_flags; + + ip = mtod(*m, struct ip *); ip_len = ntohs(ip->ip_len); ip_off = ntohs(ip->ip_off); /* Copied from FreeBSD 10.0-CURRENT ip_output. */ - m0->m_pkthdr.csum_flags |= CSUM_IP; - if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) { - in_delayed_cksum(m0); - m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + (*m)->m_pkthdr.csum_flags |= CSUM_IP; + if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) { + in_delayed_cksum(*m); + (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } - if (m0->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) { - pf_sctp_checksum(m0, (uint32_t)(ip->ip_hl << 2)); - m0->m_pkthdr.csum_flags &= ~CSUM_SCTP; - } - - if (pd->dir == PF_IN) { - /* - * Make sure dummynet gets the correct direction, in case it needs to - * re-inject later. - */ - pd->dir = PF_OUT; - - /* - * The following processing is actually the rest of the inbound processing, even - * though we've marked it as outbound (so we don't look through dummynet) and it - * happens after the outbound processing (pf_test(PF_OUT) above). - * Swap the dummynet pipe numbers, because it's going to come to the wrong - * conclusion about what direction it's processing, and we can't fix it or it - * will re-inject incorrectly. Swapping the pipe numbers means that its incorrect - * decision will pick the right pipe, and everything will mostly work as expected. - */ - tmp = pd->act.dnrpipe; - pd->act.dnrpipe = pd->act.dnpipe; - pd->act.dnpipe = tmp; + if ((*m)->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) { + pf_sctp_checksum((*m), (uint32_t)(ip->ip_hl << 2)); + (*m)->m_pkthdr.csum_flags &= ~CSUM_SCTP; } /* @@ -9161,371 +9141,432 @@ * care of the fragmentation for us, we can just send directly. */ if (ip_len <= ifp->if_mtu || - (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) { + ((*m)->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) { ip->ip_sum = 0; - if (m0->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) { - ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); - m0->m_pkthdr.csum_flags &= ~CSUM_IP; - } - m_clrprotoflags(m0); /* Avoid confusing lower layers. */ - - md = m0; - error = pf_dummynet_route(pd, s, r, ifp, gw, &md); - if (md != NULL) { - error = (*ifp->if_output)(ifp, md, gw, &ro); - SDT_PROBE2(pf, ip, route_to, output, ifp, error); + if ((*m)->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) { + ip->ip_sum = in_cksum(*m, ip->ip_hl << 2); + (*m)->m_pkthdr.csum_flags &= ~CSUM_IP; } + m_clrprotoflags(*m); /* Avoid confusing lower layers. */ + error = (*ifp->if_output)(ifp, *m, gw, &ro); + SDT_PROBE2(pf, ip, route_to, output, ifp, error); goto done; } /* Balk when DF bit is set or the interface didn't support TSO. */ - if ((ip_off & IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) { + if ((ip_off & IP_DF) || ((*m)->m_pkthdr.csum_flags & CSUM_TSO)) { error = EMSGSIZE; KMOD_IPSTAT_INC(ips_cantfrag); - if (pd->act.rt != PF_DUPTO) { - if (s && s->nat_rule != NULL) { - MPASS(m0 == pd->m); - PACKET_UNDO_NAT(pd, - (ip->ip_hl << 2) + (ip_off & IP_OFFMASK), - s); - } - - pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, - ifp->if_mtu, pd->af, r, pd->act.rtableid); + st = pf_find_state_byid(pf_mtag->state_id, + pf_mtag->state_creatorid); + if (st != NULL && st->nat_rule != NULL && + pf_undo_nat_state(*m, AF_INET, st, PF_OUT) != -1) { + PF_STATE_UNLOCK(st); + printf("%s: sending ICMP_UNREACH_NEEDFRAG\n", __func__); + pf_send_icmp(*m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, + ifp->if_mtu, AF_INET, NULL, M_GETFIB(*m)); } + SDT_PROBE1(pf, ip, route_to, drop, __LINE__); - action = PF_DROP; goto bad; } - error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist); + error = ip_fragment(ip, m, ifp->if_mtu, ifp->if_hwassist); if (error) { SDT_PROBE1(pf, ip, route_to, drop, __LINE__); - action = PF_DROP; goto bad; } - for (; m0; m0 = m1) { - m1 = m0->m_nextpkt; - m0->m_nextpkt = NULL; + for (; (*m); (*m) = m0) { + m0 = (*m)->m_nextpkt; + (*m)->m_nextpkt = 0; if (error == 0) { m_clrprotoflags(m0); - md = m0; - pd->pf_mtag = pf_find_mtag(md); - error = pf_dummynet_route(pd, s, r, ifp, - gw, &md); - if (md != NULL) { - error = (*ifp->if_output)(ifp, md, gw, &ro); - SDT_PROBE2(pf, ip, route_to, output, ifp, error); - } + error = (*ifp->if_output)(ifp, *m, gw, &ro); + SDT_PROBE2(pf, ip, route_to, output, ifp, error); } else - m_freem(m0); + goto bad; } if (error == 0) KMOD_IPSTAT_INC(ips_fragmented); done: - if (pd->act.rt != PF_DUPTO) - pd->m = NULL; - else - action = PF_PASS; - return (action); - -bad_locked: - if (s) - PF_STATE_UNLOCK(s); + *m = NULL; + return; bad: - m_freem(m0); - goto done; + m_freem(*m); + goto done; } #endif /* INET */ +/* + * Pf equivalent of ip6_forward() and ip6_output(). Used only in route-to + * and af-to cases, either directly or when reinjecting from dummynet. + */ #ifdef INET6 -static int -pf_route6(struct pf_krule *r, struct ifnet *oifp, - struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp) +void +pf_forward6(struct mbuf **m, u_int8_t dir) { - struct mbuf *m0, *md; - struct m_tag *mtag; - struct sockaddr_in6 dst; + struct ifnet *ifp; struct ip6_hdr *ip6; - struct ifnet *ifp = NULL; - int r_dir; - bool skip_test = false; - int action = PF_PASS; + struct sockaddr_in6 *gw; + struct pf_kstate *st; + struct pf_mtag *pf_mtag; + struct m_tag *reass_mtag; - KASSERT(pd->m && r && oifp, ("%s: invalid parameters", __func__)); + NET_EPOCH_ASSERT(); - SDT_PROBE4(pf, ip6, route_to, entry, pd->m, pd, s, oifp); +#ifdef INVARIANTS + pf_mtag = pf_find_mtag(*m); +#endif /* INVARIANTS */ + MPASS(pf_mtag != NULL); + KASSERT(pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO, ("Called for non-route output")); - if (s) { - r_dir = s->direction; - } else { - r_dir = r->direction; + /* Routing for packets tagged in PF_IN */ + pf_mtag = pf_find_mtag(*m); + ifp = ifnet_byindexgen(pf_mtag->if_index, pf_mtag->if_idxgen); + if (ifp == NULL || ifp->if_flags & IFF_DYING) { + goto bad; } + if (ifp->if_flags & IFF_LOOPBACK) + (*m)->m_flags |= M_SKIP_FIREWALL; - KASSERT(pd->dir == PF_IN || pd->dir == PF_OUT || - r_dir == PF_IN || r_dir == PF_OUT, ("%s: invalid direction", - __func__)); - - if ((pd->pf_mtag == NULL && - ((pd->pf_mtag = pf_get_mtag(pd->m)) == NULL)) || - pd->pf_mtag->routed++ > 3) { - m0 = pd->m; - pd->m = NULL; - action = PF_DROP; - SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); - goto bad_locked; - } - - if (pd->act.rt_kif != NULL) - ifp = pd->act.rt_kif->pfik_ifp; - - if (pd->act.rt == PF_DUPTO) { - if ((pd->pf_mtag->flags & PF_MTAG_FLAG_DUPLICATED)) { - if (s != NULL) { - PF_STATE_UNLOCK(s); - } - if (ifp == oifp) { - /* When the 2nd interface is not skipped */ - return (action); - } else { - m0 = pd->m; - pd->m = NULL; - action = PF_DROP; - SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); - goto bad; - } - } else { - pd->pf_mtag->flags |= PF_MTAG_FLAG_DUPLICATED; - if (((m0 = m_dup(pd->m, M_NOWAIT)) == NULL)) { - if (s) - PF_STATE_UNLOCK(s); - return (action); - } + /* + * Continue with inbound pfil hooks. + * pf_test() won't be repeated thanks to the MTAG_PFIL_NEXT_HOOK tag. + */ + if (dir == PF_IN) { + if (pfil_mbuf_in(V_inet6_pfil_head, m, + (*m)->m_pkthdr.rcvif, NULL) != PFIL_PASS) { + return; } - } else { - if ((pd->act.rt == PF_REPLYTO) == (r_dir == pd->dir)) { - if (pd->af == pd->naf) { - pf_dummynet(pd, s, r, &pd->m); - if (s) - PF_STATE_UNLOCK(s); - return (action); - } else { - if (r_dir == PF_IN) { - skip_test = true; - } - } - } - - /* - * If we're actually doing route-to and af-to and are in the - * reply direction. - */ - if (pd->act.rt_kif && pd->act.rt_kif->pfik_ifp && - pd->af != pd->naf) { - if (pd->act.rt == PF_ROUTETO && r->naf != AF_INET6) { - /* Un-set ifp so we do a plain route lookup. */ - ifp = NULL; - } - if (pd->act.rt == PF_REPLYTO && r->naf != AF_INET) { - /* Un-set ifp so we do a plain route lookup. */ - ifp = NULL; - } - } - m0 = pd->m; - } - - ip6 = mtod(m0, struct ip6_hdr *); - - bzero(&dst, sizeof(dst)); - dst.sin6_family = AF_INET6; - dst.sin6_len = sizeof(dst); - pf_addrcpy((struct pf_addr *)&dst.sin6_addr, &pd->act.rt_addr, - AF_INET6); - - if (pd->dir == PF_IN) { + ip6 = mtod(*m, struct ip6_hdr *); if (ip6->ip6_hlim <= IPV6_HLIMDEC) { - if (r->rt != PF_DUPTO) - pf_send_icmp(m0, ICMP6_TIME_EXCEEDED, - ICMP6_TIME_EXCEED_TRANSIT, 0, pd->af, r, - pd->act.rtableid); - action = PF_DROP; - goto bad_locked; + pf_send_icmp(*m, ICMP6_TIME_EXCEEDED, + ICMP6_TIME_EXCEED_TRANSIT, 0, AF_INET6, NULL, + M_GETFIB(*m)); + goto bad; } ip6->ip6_hlim -= IPV6_HLIMDEC; } - if (s != NULL) { - if (ifp == NULL && (pd->af != pd->naf)) { - const struct nhop_object *nh; - nh = fib6_lookup(M_GETFIB(m0), &ip6->ip6_dst, 0, NHR_NONE, 0); - if (nh) { - ifp = nh->nh_ifp; - - /* Use the gateway if needed. */ - if (nh->nh_flags & NHF_GATEWAY) - bcopy(&nh->gw6_sa.sin6_addr, &dst.sin6_addr, - sizeof(dst.sin6_addr)); - else - dst.sin6_addr = ip6->ip6_dst; - - /* - * Bind to the correct interface if we're - * if-bound. We don't know which interface - * that will be until here, so we've inserted - * the state on V_pf_all. Fix that now. - */ - if (s->kif == V_pfi_all && ifp != NULL && - r->rule_flag & PFRULE_IFBOUND) - s->kif = ifp->if_pf_kif; - } - } - - if (r->rule_flag & PFRULE_IFBOUND && - pd->act.rt == PF_REPLYTO && - s->kif == V_pfi_all) { - s->kif = pd->act.rt_kif; - s->orig_kif = oifp->if_pf_kif; - } - - PF_STATE_UNLOCK(s); + /* + * Forward through outbound pfil hooks. This will cover outbound + * filtering by pf. pf_test() called by the outbound pfil hook will + * call pf_forward6() on its own and send the packet out. Unless + * there is "set skip" on the outbound interface or the mbuf is tagged + * with M_SKIP_FIREWALL, then the inbound pf_forward6() will send + * the packet out. + */ + if (pfil_mbuf_out(V_inet6_pfil_head, m, ifp, NULL) != PFIL_PASS) { + return; } - if (pd->af != pd->naf) { - struct udphdr *uh = &pd->hdr.udp; - - if (pd->proto == IPPROTO_UDP && uh->uh_sum == 0) { - uh->uh_sum = in6_cksum_pseudo(ip6, - ntohs(uh->uh_ulen), IPPROTO_UDP, 0); - m_copyback(m0, pd->off, sizeof(*uh), pd->hdr.any); - } - } - - if (ifp == NULL) { - m0 = pd->m; - pd->m = NULL; - action = PF_DROP; - SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); + /* Routing for packets tagged in direction PF_OUT */ + pf_mtag = pf_find_mtag(*m); + ifp = ifnet_byindexgen(pf_mtag->if_index, pf_mtag->if_idxgen); + if (ifp == NULL || ifp->if_flags & IFF_DYING) { goto bad; } - - if (r->rt == PF_DUPTO) - skip_test = true; - - if (pd->dir == PF_IN && !skip_test) { - if (pf_test(AF_INET6, PF_OUT, PFIL_FWD | PF_PFIL_NOREFRAGMENT, - ifp, &m0, inp, &pd->act) != PF_PASS) { - action = PF_DROP; - SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); - goto bad; - } else if (m0 == NULL) { - action = PF_DROP; - SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); - goto done; - } - if (m0->m_len < sizeof(struct ip6_hdr)) { - DPFPRINTF(PF_DEBUG_URGENT, - "%s: m0->m_len < sizeof(struct ip6_hdr)", - __func__); - action = PF_DROP; - SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); - goto bad; - } - ip6 = mtod(m0, struct ip6_hdr *); - } - if (ifp->if_flags & IFF_LOOPBACK) - m0->m_flags |= M_SKIP_FIREWALL; + (*m)->m_flags |= M_SKIP_FIREWALL; - if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 & + gw = (struct sockaddr_in6*)(&(pf_mtag->dst)); + + ip6 = mtod(*m, struct ip6_hdr *); + + if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 & ~ifp->if_hwassist) { - uint32_t plen = m0->m_pkthdr.len - sizeof(*ip6); - in6_delayed_cksum(m0, plen, sizeof(struct ip6_hdr)); - m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; - } - - if (pd->dir == PF_IN) { - uint16_t tmp; - /* - * Make sure dummynet gets the correct direction, in case it needs to - * re-inject later. - */ - pd->dir = PF_OUT; - - /* - * The following processing is actually the rest of the inbound processing, even - * though we've marked it as outbound (so we don't look through dummynet) and it - * happens after the outbound processing (pf_test(PF_OUT) above). - * Swap the dummynet pipe numbers, because it's going to come to the wrong - * conclusion about what direction it's processing, and we can't fix it or it - * will re-inject incorrectly. Swapping the pipe numbers means that its incorrect - * decision will pick the right pipe, and everything will mostly work as expected. - */ - tmp = pd->act.dnrpipe; - pd->act.dnrpipe = pd->act.dnpipe; - pd->act.dnpipe = tmp; + uint32_t plen = (*m)->m_pkthdr.len - sizeof(*ip6); + in6_delayed_cksum(*m, plen, sizeof(struct ip6_hdr)); + (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; } /* * If the packet is too large for the outgoing interface, * send back an icmp6 error. */ - if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr)) - dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index); - mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL); - if (mtag != NULL) { + if (IN6_IS_SCOPE_EMBED(&(gw->sin6_addr))) + gw->sin6_addr.s6_addr16[1] = htons(ifp->if_index); + reass_mtag = m_tag_find(*m, PACKET_TAG_PF_REASSEMBLED, NULL); + if (reass_mtag != NULL) { int ret __sdt_used; - ret = pf_refragment6(ifp, &m0, mtag, ifp, true); + ret = pf_refragment6(ifp, m, reass_mtag, ifp, + PF_REFRAGMENT6_PF_FORWARD); SDT_PROBE2(pf, ip6, route_to, output, ifp, ret); goto done; } - if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { - md = m0; - pf_dummynet_route(pd, s, r, ifp, sintosa(&dst), &md); - if (md != NULL) { - int ret __sdt_used; - ret = nd6_output_ifp(ifp, ifp, md, &dst, NULL); - SDT_PROBE2(pf, ip6, route_to, output, ifp, ret); - } + if ((u_long)(*m)->m_pkthdr.len <= ifp->if_mtu) { + int ret __sdt_used; + ret = nd6_output_ifp(ifp, ifp, *m, gw, NULL); + SDT_PROBE2(pf, ip6, route_to, output, ifp, ret); } else { in6_ifstat_inc(ifp, ifs6_in_toobig); - if (pd->act.rt != PF_DUPTO) { - if (s && s->nat_rule != NULL) { - MPASS(m0 == pd->m); - PACKET_UNDO_NAT(pd, - ((caddr_t)ip6 - m0->m_data) + - sizeof(struct ip6_hdr), s); - } - - if (r->rt != PF_DUPTO) - pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0, - ifp->if_mtu, pd->af, r, pd->act.rtableid); + st = pf_find_state_byid(pf_mtag->state_id, + pf_mtag->state_creatorid); + if (st != NULL && st->nat_rule != NULL && + pf_undo_nat_state(*m, AF_INET6, st, PF_OUT) != -1) { + PF_STATE_UNLOCK(st); + printf("%s: sending ICMP6_PACKET_TOO_BIG\n", __func__); + pf_send_icmp(*m, ICMP6_PACKET_TOO_BIG, 0, + ifp->if_mtu, AF_INET6, NULL, M_GETFIB((*m))); } - action = PF_DROP; + SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); goto bad; } done: - if (pd->act.rt != PF_DUPTO) - pd->m = NULL; - else - action = PF_PASS; - return (action); - -bad_locked: - if (s) - PF_STATE_UNLOCK(s); + *m = NULL; + return; bad: - m_freem(m0); + m_freem(*m); goto done; } #endif /* INET6 */ +/* + * Tag packet with routing information from pf_rule_actions. + * Used by pf_route_to() and pf_dup_to(). + */ +static __inline void +pf_route_tag_to(struct pf_kstate *st, struct pf_mtag *pf_mtag, + struct pf_rule_actions *act, sa_family_t naf) +{ + struct sockaddr_in *gw4 = (struct sockaddr_in *)(&pf_mtag->dst); + struct sockaddr_in6 *gw6 = (struct sockaddr_in6 *)(&pf_mtag->dst); + + MPASS(pf_mtag != NULL); + MPASS(act != NULL); + MPASS(act->rt_kif != NULL); + + pf_mtag->flags |= PF_MTAG_FLAG_ROUTE_TO; + pf_mtag->if_index = act->rt_kif->pfik_ifp->if_index; + pf_mtag->if_idxgen = act->rt_kif->pfik_ifp->if_idxgen; + if (st && st->direction == PF_OUT) { + /* For pf_undo_nat_state and sending out ICMP errors */ + pf_mtag->state_id = st->id; + pf_mtag->state_creatorid = st->creatorid; + } + + switch (naf) { +#ifdef INET + case AF_INET: + gw4->sin_family = AF_INET; + gw4->sin_len = sizeof(struct sockaddr_in); + gw4->sin_addr.s_addr = act->rt_addr.v4.s_addr; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + gw6->sin6_family = AF_INET6; + gw6->sin6_len = sizeof(struct sockaddr_in6); + memcpy(&(gw6->sin6_addr), act->rt_addr.v6.s6_addr, + sizeof(struct in6_addr)); + break; +#endif /* INET6 */ + } +} + +/* + * Duplicate a packet, send it over specified gateway and interface. + * Outbound filtering and dummynet are skipped, this is to be like port + * mirroring on a switch. + */ +static void +pf_dup_to(struct pf_pdesc *pd, struct pf_kstate *st) +{ + struct mbuf *md; + struct pf_mtag *md_mtag; + + md = m_dup(pd->m, M_NOWAIT); + if (md == NULL) { + /* + * Don't communicate the error to caller. + * The original packet might still get forwarded just fine. + */ + return; + } + + md->m_flags |= M_SKIP_FIREWALL; + md_mtag = pf_get_mtag(md); + if (md_mtag == NULL) { + /* + * Don't communicate the error to caller. + * The original packet might still get forwarded just fine. + */ + m_freem(md); + return; + } + + pf_route_tag_to(st, md_mtag, &(pd->act), pd->naf); + switch(pd->naf) { +#ifdef INET + case AF_INET: + pf_forward4(&md, PF_OUT); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + pf_forward6(&md, PF_OUT); + break; +#endif /* INET6 */ + default: + unhandled_af(pd->naf); + } +} + +/* + * Tag packet for sending it over specific gateway and interface. + * We can't use pd here for all attributes because the function can be called + * from pf_send_tcp() where pd does not exist for the buit packet. + */ +static int +pf_route_to(struct mbuf *m, struct pf_kstate *st, const struct pf_krule *r, + struct pf_rule_actions *act, struct ifnet *oifp, u_int8_t dir, + sa_family_t af, sa_family_t naf, struct pf_mtag **pf_mtag) +{ + int r_dir; + + KASSERT(m && r, ("%s: invalid parameters", __func__)); + + SDT_PROBE3(pf, ip, route_to, entry, m, r, st); + + if (st) { + r_dir = st->direction; + } else { + r_dir = r->direction; + } + + KASSERT(dir == PF_IN || dir == PF_OUT || + r_dir == PF_IN || r_dir == PF_OUT, ("%s: invalid direction", + __func__)); + + /* + * In some cases routing is not applied: + * - reply-to in forward direction + * - route-to in reverse direction + */ + if (af != naf) { + if ((act->rt == PF_REPLYTO) == (r->af != naf)) { + return (0); + } + } else if ((act->rt == PF_REPLYTO) == (r_dir == dir)) { + return (0); + } + + /* + * Bind to the correct interface if we're if-bound. + * We don't know which interface that will be until here, + * so we've inserted the state on V_pf_all. Fix that now. + */ + if (st != NULL && st->kif == V_pfi_all && + r->rule_flag & PFRULE_IFBOUND) { + if (act->rt == PF_ROUTETO) { + MPASS(af != naf); + MPASS(st->direction == PF_IN); + st->kif = act->rt_kif; + } else { /* PF_REPLYTO */ + st->kif = act->rt_kif; + st->orig_kif = oifp->if_pf_kif; + } + } + + if (act->rt_kif->pfik_ifp->if_flags & IFF_LOOPBACK) + m->m_flags |= M_SKIP_FIREWALL; + + (*pf_mtag) = pf_get_mtag(m); + if (*pf_mtag == NULL) { + return (-1); + } + pf_route_tag_to(st, *pf_mtag, act, naf); + + return (0); +} + +static int +pf_route_tag_afto(struct pf_pdesc *pd, struct pf_kstate *st, struct pf_krule *r) { + const struct nhop_object *nh = NULL; + struct ifnet *ifp = NULL; + struct ip *ip; + struct ip6_hdr *ip6; + struct sockaddr_in *gw4; + struct sockaddr_in6 *gw6; + + MPASS(st != NULL); + MPASS(pd->af != pd->naf); + + pd->pf_mtag = pf_get_mtag(pd->m); + pd->pf_mtag->flags |= PF_MTAG_FLAG_ROUTE_TO; + gw4 = (struct sockaddr_in *)(&pd->pf_mtag->dst); + gw6 = (struct sockaddr_in6 *)(&pd->pf_mtag->dst); + + switch (pd->naf) { +#ifdef INET + case AF_INET: + ip = mtod(pd->m, struct ip *); + nh = fib4_lookup(M_GETFIB(pd->m), ip->ip_dst, 0, NHR_NONE, 0); + if (nh == NULL) + return (-1); + ifp = nh->nh_ifp; + gw4->sin_family = AF_INET; + gw4->sin_len = sizeof(struct sockaddr_in); + + /* Use the gateway if needed. */ + if (nh->nh_flags & NHF_GATEWAY) { + gw4->sin_addr = nh->gw4_sa.sin_addr; + pd->pf_mtag->ro_flags |= RT_HAS_GW; + } else { + gw4->sin_addr = ip->ip_dst; + } + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + ip6 = mtod(pd->m, struct ip6_hdr *); + nh = fib6_lookup(M_GETFIB(pd->m), &ip6->ip6_dst, 0, NHR_NONE, 0); + if (nh == NULL) + return (-1); + ifp = nh->nh_ifp; + gw6->sin6_family = AF_INET6; + gw6->sin6_len = sizeof(struct sockaddr_in6); + + /* Use the gateway if needed. */ + if (nh->nh_flags & NHF_GATEWAY) { + memcpy(&(gw6->sin6_addr), &nh->gw6_sa.sin6_addr, + sizeof(struct sockaddr_in6)); + pd->pf_mtag->ro_flags |= RT_HAS_GW; + } else { + gw6->sin6_addr = ip6->ip6_dst; + } + break; +#endif /* INET6 */ + default: + unhandled_af(pd->naf); + } + + MPASS(ifp != NULL); + + /* + * Bind to the correct interface if we're if-bound on inbound af-to. + * We don't know which interface that will be until here, + * so we've inserted the state on V_pf_all. Fix that now. + */ + if (st->kif == V_pfi_all && r->rule_flag & PFRULE_IFBOUND) { + MPASS(pd->af != pd->naf); + MPASS(st->direction == PF_IN); + st->kif = ifp->if_pf_kif; + } + + pd->pf_mtag->if_index = ifp->if_index; + pd->pf_mtag->if_idxgen = ifp->if_idxgen; + + return (0); +} + /* * FreeBSD supports cksum offloads for the following drivers. * em(4), fxp(4), lge(4), nge(4), re(4), ti(4), txp(4), xl(4) @@ -9661,9 +9702,6 @@ dndir = pd->dir; } - if (pd->pf_mtag->flags & PF_MTAG_FLAG_DUMMYNETED) - return (false); - memset(dnflow, 0, sizeof(*dnflow)); if (pd->dport != NULL) @@ -9757,8 +9795,6 @@ { struct m_tag *mtag; - pf_mtag->flags &= ~PF_MTAG_FLAG_DUMMYNET; - /* dummynet adds this tag, but pf does not need it, * and keeping it creates unexpected behavior, * e.g. in case of divert(4) usage right after dummynet. */ @@ -9768,16 +9804,7 @@ } static int -pf_dummynet(struct pf_pdesc *pd, struct pf_kstate *s, - struct pf_krule *r, struct mbuf **m0) -{ - return (pf_dummynet_route(pd, s, r, NULL, NULL, m0)); -} - -static int -pf_dummynet_route(struct pf_pdesc *pd, struct pf_kstate *s, - struct pf_krule *r, struct ifnet *ifp, const struct sockaddr *sa, - struct mbuf **m0) +pf_dummynet(struct pf_pdesc *pd, struct pf_kstate *s, struct pf_krule *r) { struct ip_fw_args dnflow; @@ -9787,38 +9814,18 @@ return (0); if (ip_dn_io_ptr == NULL) { - m_freem(*m0); - *m0 = NULL; + m_freem(pd->m); + pd->m = NULL; return (ENOMEM); } if (pd->pf_mtag == NULL && - ((pd->pf_mtag = pf_get_mtag(*m0)) == NULL)) { - m_freem(*m0); - *m0 = NULL; + ((pd->pf_mtag = pf_get_mtag(pd->m)) == NULL)) { + m_freem(pd->m); + pd->m = NULL; return (ENOMEM); } - if (ifp != NULL) { - pd->pf_mtag->flags |= PF_MTAG_FLAG_ROUTE_TO; - - pd->pf_mtag->if_index = ifp->if_index; - pd->pf_mtag->if_idxgen = ifp->if_idxgen; - - MPASS(sa != NULL); - - switch (sa->sa_family) { - case AF_INET: - memcpy(&pd->pf_mtag->dst, sa, - sizeof(struct sockaddr_in)); - break; - case AF_INET6: - memcpy(&pd->pf_mtag->dst, sa, - sizeof(struct sockaddr_in6)); - break; - } - } - if (s != NULL && s->nat_rule != NULL && s->nat_rule->action == PF_RDR && ( @@ -9831,17 +9838,11 @@ * as being local. Otherwise it might get dropped * if dummynet re-injects. */ - (*m0)->m_pkthdr.rcvif = V_loif; + pd->m->m_pkthdr.rcvif = V_loif; } if (pf_pdesc_to_dnflow(pd, r, s, &dnflow)) { - pd->pf_mtag->flags |= PF_MTAG_FLAG_DUMMYNET; - pd->pf_mtag->flags |= PF_MTAG_FLAG_DUMMYNETED; - ip_dn_io_ptr(m0, &dnflow); - if (*m0 != NULL) { - pd->pf_mtag->flags &= ~PF_MTAG_FLAG_ROUTE_TO; - pf_dummynet_flag_remove(*m0, pd->pf_mtag); - } + ip_dn_io_ptr(&(pd->m), &dnflow); } return (0); @@ -10238,8 +10239,6 @@ PF_RULES_ASSERT(); TAILQ_INIT(&pd->sctp_multihome_jobs); - if (default_actions != NULL) - memcpy(&pd->act, default_actions, sizeof(pd->act)); if (pd->pf_mtag && pd->pf_mtag->dnpipe) { pd->act.dnpipe = pd->pf_mtag->dnpipe; @@ -10687,34 +10686,6 @@ pf_init_pdesc(&pd, *m0); - if (pd.pf_mtag != NULL && (pd.pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO)) { - pd.pf_mtag->flags &= ~PF_MTAG_FLAG_ROUTE_TO; - - ifp = ifnet_byindexgen(pd.pf_mtag->if_index, - pd.pf_mtag->if_idxgen); - if (ifp == NULL || ifp->if_flags & IFF_DYING) { - m_freem(*m0); - *m0 = NULL; - return (PF_PASS); - } - (ifp->if_output)(ifp, *m0, sintosa(&pd.pf_mtag->dst), NULL); - *m0 = NULL; - return (PF_PASS); - } - - if (ip_dn_io_ptr != NULL && pd.pf_mtag != NULL && - pd.pf_mtag->flags & PF_MTAG_FLAG_DUMMYNET) { - /* Dummynet re-injects packets after they've - * completed their delay. We've already - * processed them, so pass unconditionally. */ - - /* But only once. We may see the packet multiple times (e.g. - * PFIL_IN/PFIL_OUT). */ - pf_dummynet_flag_remove(pd.m, pd.pf_mtag); - - return (PF_PASS); - } - PF_RULES_RLOCK(); if (pf_setup_pdesc(af, dir, &pd, m0, &action, &reason, kif) == -1) { @@ -10822,11 +10793,12 @@ action = pf_test(af, dir, pflags, ifp, &msyn, inp); m_freem(msyn); - if (action != PF_PASS) + if (action != PF_PASS && action != PF_AFRT) break; action = pf_test_state(&s, &pd, &reason); - if (action != PF_PASS || s == NULL) { + if ((action != PF_PASS && action != PF_AFRT) || + s == NULL) { action = PF_DROP; break; } @@ -11058,62 +11030,46 @@ switch (action) { case PF_SYNPROXY_DROP: - m_freem(*m0); + m_freem(pd.m); case PF_DEFER: - *m0 = NULL; + pd.m = NULL; action = PF_PASS; - break; + goto eat_pkt; case PF_DROP: - m_freem(*m0); - *m0 = NULL; - break; + m_freem(pd.m); + pd.m = NULL; + goto eat_pkt; case PF_AFRT: - if (pf_translate_af(&pd)) { - *m0 = pd.m; + if (pf_translate_af(&pd, s)) { action = PF_DROP; break; } -#ifdef INET - if (pd.naf == AF_INET) { - action = pf_route(r, kif->pfik_ifp, s, &pd, - inp); - } -#endif /* INET */ -#ifdef INET6 - if (pd.naf == AF_INET6) { - action = pf_route6(r, kif->pfik_ifp, s, &pd, - inp); -} -#endif /* INET6 */ - *m0 = pd.m; - goto out; - break; + /* fallthrough */ default: if (pd.act.rt) { - switch (af) { -#ifdef INET - case AF_INET: - /* pf_route() returns unlocked. */ - action = pf_route(r, kif->pfik_ifp, s, &pd, - inp); + if (pd.act.rt == PF_DUPTO) + pf_dup_to(&pd, s); + else if (pf_route_to(pd.m, s, r, + s ? &(s->act) : &(pd.act), kif->pfik_ifp, + pd.dir, pd.af, pd.naf, &(pd.pf_mtag))) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - /* pf_route6() returns unlocked. */ - action = pf_route6(r, kif->pfik_ifp, s, &pd, - inp); - break; -#endif /* INET6 */ } - *m0 = pd.m; - goto out; } - if (pf_dummynet(&pd, s, r, m0) != 0) { + if (action == PF_AFRT && + (pd.pf_mtag == NULL || + !(pd.pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO)) && + pf_route_tag_afto(&pd, s, r)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_NOROUTE); + break; + } + if (pf_dummynet(&pd, s, r) != 0) { action = PF_DROP; REASON_SET(&reason, PFRES_MEMORY); + break; } - break; } eat_pkt: @@ -11129,17 +11085,61 @@ if (s) PF_STATE_UNLOCK(s); -out: + if (pd.m == NULL) + goto finish; + + /* + * For af-to packet must have passed either through + * pf_route_to or pf_route_tag_afto. + */ + KASSERT(action != PF_AFRT || (pd.pf_mtag && + pd.pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO), + ("%s: af-to requested but no PF_MTAG_FLAG_ROUTE_TO applied\n", + __func__)); + + /* + * For route-to and af-to we can't continue on the normal path of packet + * forwarding. Even though ip(6)?_(try)?route\(\) could be modified + * to deal with routing information embedded in pf_mtag, they can't + * deal with AF of the packet changing. Instead send the packet + * to custom forwarding functions. Unless it's the recreated SYN for + * syncookie operation - this must pass through pf_test() and only + * undergo inbound filtering. The function pf_synproxy() will take care + * of sending the new SYN out through outbound filtering and routing. + */ + if (pd.pf_mtag && pd.pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO && + !(pd.pf_mtag->flags & PF_MTAG_FLAG_SYNCOOKIE_RECREATED)) { + if (pd.af != pd.naf) + pf_skip_hook(pd.m, pd.naf, pd.dir); + switch (pd.naf) { +#ifdef INET + case AF_INET: + pf_forward4(&(pd.m), pd.dir); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + pf_forward6(&(pd.m), pd.dir); + break; +#endif /* INET6 */ + default: + unhandled_af(pd.naf); + } + } + #ifdef INET6 /* If reassembled packet passed, create new fragments. */ - if (af == AF_INET6 && action == PF_PASS && *m0 && dir == PF_OUT && + if (af == AF_INET6 && action == PF_PASS && pd.m && dir == PF_OUT && (! (pflags & PF_PFIL_NOREFRAGMENT)) && (mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL)) != NULL) - action = pf_refragment6(ifp, m0, mtag, NULL, pflags & PFIL_FWD); + action = pf_refragment6(ifp, &(pd.m), mtag, NULL, + (pflags & PFIL_FWD) ? PF_REFRAGMENT6_IP6_FORWARD : PF_REFRAGMENT6_IP6_OUTPUT); #endif /* INET6 */ pf_sctp_multihome_delayed(&pd, kif, s, action); +finish: + *m0 = pd.m; return (action); } #endif /* INET || INET6 */ diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -241,18 +241,6 @@ int flags, void *ruleset __unused, struct inpcb *inp); static pfil_return_t pf_eth_check_out(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp); -#ifdef INET -static pfil_return_t pf_check_in(struct mbuf **m, struct ifnet *ifp, - int flags, void *ruleset __unused, struct inpcb *inp); -static pfil_return_t pf_check_out(struct mbuf **m, struct ifnet *ifp, - int flags, void *ruleset __unused, struct inpcb *inp); -#endif -#ifdef INET6 -static pfil_return_t pf_check6_in(struct mbuf **m, struct ifnet *ifp, - int flags, void *ruleset __unused, struct inpcb *inp); -static pfil_return_t pf_check6_out(struct mbuf **m, struct ifnet *ifp, - int flags, void *ruleset __unused, struct inpcb *inp); -#endif static void hook_pf_eth(void); static void hook_pf(void); @@ -6566,6 +6554,7 @@ switch (chk) { case PF_PASS: + case PF_AFRT: if (*m == NULL) return (PFIL_CONSUMED); else @@ -6831,6 +6820,36 @@ atomic_store_bool(&V_pf_pfil_hooked, false); } +void +pf_skip_hook(struct mbuf *m, sa_family_t naf, u_int8_t dir) +{ + /* + * When reinject the AF-tranlated packet to the network stack + * update MTAG_PFIL/MTAG_PFIL_NEXT_HOOK as if pf filtering + * has already happened for the new AF. + */ + switch (naf) { +#ifdef AF_INET + case AF_INET: + if (dir == PF_IN) { + pfil_mbuf_skip_hook(m, V_pf_ip4_in_hook); + } else { + pfil_mbuf_skip_hook(m, V_pf_ip4_out_hook); + } + break; +#endif /* AF_INET */ +#ifdef AF_INET6 + case AF_INET6: + if (dir == PF_IN) { + pfil_mbuf_skip_hook(m, V_pf_ip6_in_hook); + } else { + pfil_mbuf_skip_hook(m, V_pf_ip6_out_hook); + } + break; +#endif /* AF_INET6 */ + } +} + static void pf_load_vnet(void) { @@ -6873,6 +6892,9 @@ pfi_initialize(); + pf_forward4_p = pf_forward4; + pf_forward6_p = pf_forward6; + return (0); } @@ -6973,6 +6995,9 @@ pf_nl_unregister(); + pf_forward4_p = NULL; + pf_forward6_p = NULL; + if (pf_dev != NULL) destroy_dev(pf_dev); diff --git a/sys/netpfil/pf/pf_mtag.h b/sys/netpfil/pf/pf_mtag.h --- a/sys/netpfil/pf/pf_mtag.h +++ b/sys/netpfil/pf/pf_mtag.h @@ -37,11 +37,11 @@ /* pf_mtag -> flags */ #define PF_MTAG_FLAG_ROUTE_TO 0x01 -#define PF_MTAG_FLAG_DUMMYNET 0x02 +/* unused 0x02 */ #define PF_MTAG_FLAG_TRANSLATE_LOCALHOST 0x04 #define PF_MTAG_FLAG_PACKET_LOOPED 0x08 #define PF_MTAG_FLAG_FASTFWD_OURS_PRESENT 0x10 -#define PF_MTAG_FLAG_DUMMYNETED 0x20 +/* unused 0x20 */ #define PF_MTAG_FLAG_DUPLICATED 0x40 #define PF_MTAG_FLAG_SYNCOOKIE_RECREATED 0x80 @@ -54,9 +54,12 @@ u_int8_t routed; u_int16_t dnpipe; u_int32_t dnflags; - u_int16_t if_index; /* For ROUTE_TO */ - u_int16_t if_idxgen; /* For ROUTE_TO */ - struct sockaddr_storage dst; /* For ROUTE_TO */ + uint64_t state_id; + uint32_t state_creatorid; + uint16_t ro_flags; /* For pf_route_to and pf_route_afto */ + u_int16_t if_index; /* For pf_route_to and pf_route_afto */ + u_int16_t if_idxgen; /* For pf_route_to and pf_route_afto */ + struct sockaddr_storage dst; /* For pf_route_to and pf_route_afto */ }; static __inline struct pf_mtag * diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c --- a/sys/netpfil/pf/pf_norm.c +++ b/sys/netpfil/pf/pf_norm.c @@ -1032,7 +1032,7 @@ int pf_refragment6(struct ifnet *ifp, struct mbuf **m0, struct m_tag *mtag, - struct ifnet *rt, bool forward) + struct ifnet *rt, pf_refragment6_forward_t forward) { struct mbuf *m = *m0, *t; struct ip6_hdr *hdr; @@ -1123,12 +1123,21 @@ icmp6_error(m, ICMP6_PACKET_TOO_BIG, 0, if_getmtu(ifp)); } - } else if (forward) { - MPASS(m->m_pkthdr.rcvif != NULL); - ip6_forward(m, 0); } else { - (void)ip6_output(m, NULL, NULL, 0, NULL, NULL, - NULL); + switch (forward) { + case PF_REFRAGMENT6_IP6_OUTPUT: + (void)ip6_output(m, NULL, NULL, 0, NULL, NULL, + NULL); + break; + case PF_REFRAGMENT6_IP6_FORWARD: + MPASS(m->m_pkthdr.rcvif != NULL); + ip6_forward(m, 0); + break; + case PF_REFRAGMENT6_PF_FORWARD: + MPASS(m->m_pkthdr.rcvif != NULL); + pf_forward6(&m, PF_OUT); + break; + } } } diff --git a/sys/netpfil/pf/pf_syncookies.c b/sys/netpfil/pf/pf_syncookies.c --- a/sys/netpfil/pf/pf_syncookies.c +++ b/sys/netpfil/pf/pf_syncookies.c @@ -43,7 +43,7 @@ * We leave synflood mode when the number of half-open states - including * in-flight syncookies - drops far enough again */ - + /* * syncookie enabled Initial Sequence Number: * 24 bit MAC @@ -297,7 +297,7 @@ pf_send_tcp(NULL, pd->af, pd->dst, pd->src, *pd->dport, *pd->sport, iss, ntohl(pd->hdr.tcp.th_seq) + 1, TH_SYN|TH_ACK, 0, mss, 0, M_SKIP_FIREWALL | (pd->m->m_flags & M_LOOP), 0, 0, - pd->act.rtableid); + pd->act.rtableid, NULL); counter_u64_add(V_pf_status.lcounters[KLCNT_SYNCOOKIES_SENT], 1); /* XXX Maybe only in adaptive mode? */ atomic_add_64(&V_pf_status.syncookies_inflight[V_pf_syncookie_status.oddeven], @@ -513,7 +513,7 @@ mss = pf_syncookie_msstab[cookie.flags.mss_idx]; wscale = pf_syncookie_wstab[cookie.flags.wscale_idx]; - return (pf_build_tcp(NULL, pd->af, pd->src, pd->dst, *pd->sport, + return(pf_build_tcp(NULL, pd->af, pd->src, pd->dst, *pd->sport, *pd->dport, seq, 0, TH_SYN, wscale, mss, pd->ttl, (pd->m->m_flags & M_LOOP), 0, PF_MTAG_FLAG_SYNCOOKIE_RECREATED, cookie.flags.sack_ok, pd->act.rtableid));