diff --git a/sys/net/pfil.c b/sys/net/pfil.c
--- a/sys/net/pfil.c
+++ b/sys/net/pfil.c
@@ -115,6 +115,10 @@
 static struct pfil_link *pfil_link_remove(pfil_chain_t *, pfil_hook_t );
 static void pfil_link_free(epoch_context_t);
 
+/* pf(4) functions accessible for dummynet*/
+void	(*pf_forward4_p)(struct mbuf **, u_int8_t);
+void	(*pf_forward6_p)(struct mbuf **, u_int8_t);
+
 /*
  * To couple a filtering point that provides memory pointer with a filter that
  * works on mbufs only.
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -2326,6 +2326,7 @@
 
 struct pf_mtag			*pf_get_mtag(struct mbuf *);
 
+void				 pf_skip_hook(struct mbuf *, sa_family_t, u_int8_t);
 extern void			 pf_calc_skip_steps(struct pf_krulequeue *);
 #ifdef ALTQ
 extern	void			 pf_altq_ifnet_event(struct ifnet *, int);
@@ -2461,8 +2462,13 @@
 #ifdef INET6
 int	pf_normalize_ip6(int, u_short *, struct pf_pdesc *);
 int	pf_max_frag_size(struct mbuf *);
+typedef enum {
+	PF_REFRAGMENT6_IP6_OUTPUT = 0,
+	PF_REFRAGMENT6_IP6_FORWARD,
+	PF_REFRAGMENT6_PF_FORWARD,
+} pf_refragment6_forward_t;
 int	pf_refragment6(struct ifnet *, struct mbuf **, struct m_tag *,
-	    struct ifnet *, bool);
+	    struct ifnet *, pf_refragment6_forward_t);
 #endif /* INET6 */
 
 int	pf_multihome_scan_init(int, int, struct pf_pdesc *);
@@ -2506,7 +2512,7 @@
 struct pf_state_key *pf_alloc_state_key(int);
 int	pf_translate(struct pf_pdesc *, struct pf_addr *, u_int16_t,
 	    struct pf_addr *, u_int16_t, u_int16_t, int);
-int	pf_translate_af(struct pf_pdesc *);
+int	pf_translate_af(struct pf_pdesc *, struct pf_kstate *);
 bool	pf_init_threshold(struct pf_kthreshold *, uint32_t, uint32_t);
 
 void	pfr_initialize(void);
@@ -2594,7 +2600,7 @@
 			    const struct pf_addr *, const struct pf_addr *,
 			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
 			    u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
-			    u_int16_t, u_int16_t, int);
+			    u_int16_t, u_int16_t, int, struct pf_kstate *);
 
 void			 pf_syncookies_init(void);
 void			 pf_syncookies_cleanup(void);
@@ -2749,6 +2755,14 @@
 #if defined(INET) || defined(INET6)
 void	pf_scrub(struct pf_pdesc *);
 #endif
+#ifdef INET
+extern void		 (*pf_forward4_p)(struct mbuf **, u_int8_t);
+void			 pf_forward4(struct mbuf **, u_int8_t);
+#endif /* INET */
+#ifdef INET6
+extern void		 (*pf_forward6_p)(struct mbuf **, u_int8_t);
+void			 pf_forward6(struct mbuf **, u_int8_t);
+#endif /* INET6 */
 
 struct pfi_kkif		*pf_kkif_create(int);
 void			 pf_kkif_free(struct pfi_kkif *);
diff --git a/sys/netpfil/ipfw/ip_dn_io.c b/sys/netpfil/ipfw/ip_dn_io.c
--- a/sys/netpfil/ipfw/ip_dn_io.c
+++ b/sys/netpfil/ipfw/ip_dn_io.c
@@ -30,6 +30,7 @@
  * Dummynet portions related to packet handling.
  */
 #include <sys/cdefs.h>
+#include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
@@ -51,6 +52,7 @@
 #include <net/if_var.h>	/* NET_EPOCH_... */
 #include <net/if_private.h>
 #include <net/netisr.h>
+#include <net/pfvar.h>	/* pf_forward[46]_p */
 #include <net/vnet.h>
 
 #include <netinet/in.h>
@@ -69,6 +71,7 @@
 #include <netpfil/ipfw/dn_aqm.h>
 #endif
 #include <netpfil/ipfw/dn_sched.h>
+#include <netpfil/pf/pf_mtag.h>
 
 /*
  * We keep a private variable for the simulation time, but we could
@@ -88,6 +91,10 @@
 
 extern	void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
 
+/* pf functions accessible for dummynet */
+extern void	(*pf_forward4_p)(struct mbuf **, u_int8_t);
+extern void	(*pf_forward6_p)(struct mbuf **, u_int8_t);
+
 #ifdef SYSCTL_NODE
 
 /*
@@ -772,6 +779,7 @@
 dummynet_send(struct mbuf *m)
 {
 	struct mbuf *n;
+	struct pf_mtag	*pf_mtag;
 
 	NET_EPOCH_ASSERT();
 
@@ -802,22 +810,44 @@
 			}
 		}
 
+		pf_mtag = pf_find_mtag(m);
 		switch (dst) {
 		case DIR_OUT:
-			ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
+			if (pf_mtag &&
+			    pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO &&
+			    pf_forward4_p != NULL) {
+				pf_forward4_p(&m, PF_OUT);
+			} else {
+				ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
+			}
 			break ;
 
 		case DIR_IN :
-			netisr_dispatch(NETISR_IP, m);
+			if (pf_mtag &&
+			    pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO &&
+			    pf_forward4_p != NULL)
+				pf_forward4_p(&m, PF_IN);
+			else
+				netisr_dispatch(NETISR_IP, m);
 			break;
 
 #ifdef INET6
 		case DIR_IN | PROTO_IPV6:
-			netisr_dispatch(NETISR_IPV6, m);
+			if (pf_mtag &&
+			    pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO &&
+			    pf_forward6_p != NULL)
+				pf_forward6_p(&m, PF_IN);
+			else
+				netisr_dispatch(NETISR_IPV6, m);
 			break;
 
 		case DIR_OUT | PROTO_IPV6:
-			ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL);
+			if (pf_mtag &&
+			    pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO &&
+			    pf_forward6_p != NULL)
+				pf_forward6_p(&m, PF_OUT);
+			else
+				ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL);
 			break;
 #endif
 
diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h
--- a/sys/netpfil/pf/pf.h
+++ b/sys/netpfil/pf/pf.h
@@ -163,7 +163,8 @@
 #define PFRES_SYNPROXY	14		/* SYN proxy */
 #define PFRES_MAPFAILED	15		/* pf_map_addr() failed */
 #define PFRES_TRANSLATE	16		/* No translation address available */
-#define PFRES_MAX	17		/* total+1 */
+#define PFRES_NOROUTE	17		/* No route for af-to */
+#define PFRES_MAX	18		/* total+1 */
 
 #define PFRES_NAMES { \
 	"match", \
@@ -183,6 +184,7 @@
 	"synproxy", \
 	"map-failed", \
 	"translate", \
+	"no-route", \
 	NULL \
 }
 
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -336,10 +336,7 @@
 static __inline void	 pf_dummynet_flag_remove(struct mbuf *m,
 			    struct pf_mtag *pf_mtag);
 static int		 pf_dummynet(struct pf_pdesc *, struct pf_kstate *,
-			    struct pf_krule *, struct mbuf **);
-static int		 pf_dummynet_route(struct pf_pdesc *,
-			    struct pf_kstate *, struct pf_krule *,
-			    struct ifnet *, const struct sockaddr *, struct mbuf **);
+			    struct pf_krule *);
 static int		 pf_test_eth_rule(int, struct pfi_kkif *,
 			    struct mbuf **);
 static int		 pf_test_rule(struct pf_krule **, struct pf_kstate **,
@@ -406,19 +403,13 @@
 static void		 pf_purge_unlinked_rules(void);
 static int		 pf_mtag_uminit(void *, int, int);
 static void		 pf_mtag_free(struct m_tag *);
-static void		 pf_packet_rework_nat(struct pf_pdesc *, int,
-			    struct pf_state_key *);
-#ifdef INET
-static int		 pf_route(struct pf_krule *,
-			    struct ifnet *, struct pf_kstate *,
-			    struct pf_pdesc *, struct inpcb *);
-#endif /* INET */
+static int		 pf_route_to(struct mbuf *, struct pf_kstate *,
+			    const struct pf_krule *, struct pf_rule_actions *,
+			    struct ifnet *, u_int8_t, sa_family_t ,
+			    sa_family_t, struct pf_mtag **);
 #ifdef INET6
 static void		 pf_change_a6(struct pf_addr *, u_int16_t *,
 			    struct pf_addr *, u_int8_t);
-static int		 pf_route6(struct pf_krule *,
-			    struct ifnet *, struct pf_kstate *,
-			    struct pf_pdesc *, struct inpcb *);
 #endif /* INET6 */
 static __inline void pf_set_protostate(struct pf_kstate *, int, u_int8_t);
 
@@ -429,16 +420,6 @@
 
 VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
 
-#define	PACKET_UNDO_NAT(_pd, _off, _s)					\
-	do {								\
-		struct pf_state_key *nk;				\
-		if ((pd->dir) == PF_OUT)				\
-			nk = (_s)->key[PF_SK_STACK];			\
-		else							\
-			nk = (_s)->key[PF_SK_WIRE];			\
-		pf_packet_rework_nat(_pd, _off, nk);		\
-	} while (0)
-
 #define	PACKET_LOOPED(pd)	((pd)->pf_mtag &&			\
 				 (pd)->pf_mtag->flags & PF_MTAG_FLAG_PACKET_LOOPED)
 
@@ -617,96 +598,185 @@
 	}
 }
 
-static void
-pf_packet_rework_nat(struct pf_pdesc *pd, int off, struct pf_state_key *nk)
+static int
+pf_undo_nat_state(struct mbuf *m, sa_family_t af, struct pf_kstate *st,
+    u_int8_t dir)
 {
+	struct pf_pdesc		 pd;
+	struct pf_state_key	*nk;
+	u_short			 action;
+	u_short			 reason;
 
-	switch (pd->virtual_proto) {
+	memset(&pd, 0, sizeof(pd));
+
+	/* Set up a minimal pd for pf_change_ap */
+	pd.dir = dir;
+	pd.m = m;
+	pd.sidx = (dir == PF_IN) ? 0 : 1;
+	pd.didx = (dir == PF_IN) ? 1 : 0;
+	pd.af = pd.naf = af;
+
+	if (dir == PF_OUT)
+		nk = st->key[PF_SK_STACK];
+	else
+		nk = st->key[PF_SK_WIRE];
+
+	MPASS(nk != NULL);
+
+	switch (af) {
+#ifdef INET
+	case AF_INET: {
+		struct ip *h;
+		h = mtod(pd.m, struct ip *);
+		if (pf_walk_header(&pd, h, &reason) != PF_PASS) {
+			printf("%s: pf_walk_header failed for IPv4\n", __func__);
+			return (-1);
+		}
+		pd.src = (struct pf_addr *)&h->ip_src;
+		pd.dst = (struct pf_addr *)&h->ip_dst;
+		pd.ip_sum = &h->ip_sum;
+		pd.tot_len = ntohs(h->ip_len);
+		pd.df = h->ip_off & htons(IP_DF);
+		pd.virtual_proto = (h->ip_off & htons(IP_MF | IP_OFFMASK)) ?
+		    PF_VPROTO_FRAGMENT : pd.proto;
+		}
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6: {
+		struct ip6_hdr *h;
+		h = mtod(pd.m, struct ip6_hdr *);
+		if (pf_walk_header6(&pd, h, &reason) != PF_PASS) {
+			printf("%s: pf_walk_header failed for IPv6\n", __func__);
+			return (-1);
+		}
+		pd.src = (struct pf_addr *)&h->ip6_src;
+		pd.dst = (struct pf_addr *)&h->ip6_dst;
+		pd.ip_sum = NULL;
+		pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
+		pd.virtual_proto = (pd.fragoff != 0) ?
+		    PF_VPROTO_FRAGMENT : pd.proto;
+		}
+		break;
+#endif /* INET6 */
+	}
+
+	switch (pd.virtual_proto) {
 	case IPPROTO_TCP: {
-		struct tcphdr *th = &pd->hdr.tcp;
+		struct tcphdr *th = &pd.hdr.tcp;
+		if (!pf_pull_hdr(pd.m, pd.off, th, sizeof(*th), &action,
+		    &reason, af)) {
+			printf("%s: pf_pull_hdr failed for TCP\n", __func__);
+			return (-1);
+		}
 
-		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af))
-			pf_change_ap(pd, pd->src, &th->th_sport,
-			    &nk->addr[pd->sidx], nk->port[pd->sidx]);
-		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af))
-			pf_change_ap(pd, pd->dst, &th->th_dport,
-			    &nk->addr[pd->didx], nk->port[pd->didx]);
-		m_copyback(pd->m, off, sizeof(*th), (caddr_t)th);
+		if (PF_ANEQ(pd.src, &nk->addr[pd.sidx], pd.af))
+			pf_change_ap(&pd, pd.src, &th->th_sport,
+			    &nk->addr[pd.sidx], nk->port[pd.sidx]);
+		if (PF_ANEQ(pd.dst, &nk->addr[pd.didx], pd.af))
+			pf_change_ap(&pd, pd.dst, &th->th_dport,
+			    &nk->addr[pd.didx], nk->port[pd.didx]);
+		m_copyback(pd.m, pd.off, sizeof(*th), (caddr_t)th);
 		break;
 	}
 	case IPPROTO_UDP: {
-		struct udphdr *uh = &pd->hdr.udp;
+		struct udphdr *uh = &pd.hdr.udp;
+		if (!pf_pull_hdr(pd.m, pd.off, uh, sizeof(*uh), &action,
+		    &reason, af)) {
+			printf("%s: pf_pull_hdr failed for UDP\n", __func__);
+			return (-1);
+		}
 
-		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af))
-			pf_change_ap(pd, pd->src, &uh->uh_sport,
-			    &nk->addr[pd->sidx], nk->port[pd->sidx]);
-		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af))
-			pf_change_ap(pd, pd->dst, &uh->uh_dport,
-			    &nk->addr[pd->didx], nk->port[pd->didx]);
-		m_copyback(pd->m, off, sizeof(*uh), (caddr_t)uh);
+		if (PF_ANEQ(pd.src, &nk->addr[pd.sidx], pd.af))
+			pf_change_ap(&pd, pd.src, &uh->uh_sport,
+			    &nk->addr[pd.sidx], nk->port[pd.sidx]);
+		if (PF_ANEQ(pd.dst, &nk->addr[pd.didx], pd.af))
+			pf_change_ap(&pd, pd.dst, &uh->uh_dport,
+			    &nk->addr[pd.didx], nk->port[pd.didx]);
+		m_copyback(pd.m, pd.off, sizeof(*uh), (caddr_t)uh);
 		break;
 	}
 	case IPPROTO_SCTP: {
-		struct sctphdr *sh = &pd->hdr.sctp;
-
-		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) {
-			pf_change_ap(pd, pd->src, &sh->src_port,
-			    &nk->addr[pd->sidx], nk->port[pd->sidx]);
+		struct sctphdr *sh = &pd.hdr.sctp;
+		if (!pf_pull_hdr(pd.m, pd.off, sh, sizeof(*sh),
+		    &action, &reason, af)) {
+			printf("%s: pf_pull_hdr failed for SCTP\n", __func__);
+			return (-1);
 		}
-		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) {
-			pf_change_ap(pd, pd->dst, &sh->dest_port,
-			    &nk->addr[pd->didx], nk->port[pd->didx]);
+
+		if (PF_ANEQ(pd.src, &nk->addr[pd.sidx], pd.af)) {
+			pf_change_ap(&pd, pd.src, &sh->src_port,
+			    &nk->addr[pd.sidx], nk->port[pd.sidx]);
+		}
+		if (PF_ANEQ(pd.dst, &nk->addr[pd.didx], pd.af)) {
+			pf_change_ap(&pd, pd.dst, &sh->dest_port,
+			    &nk->addr[pd.didx], nk->port[pd.didx]);
 		}
 
 		break;
 	}
 	case IPPROTO_ICMP: {
-		struct icmp *ih = &pd->hdr.icmp;
+		struct icmp *ih = &pd.hdr.icmp;
+		if (!pf_pull_hdr(pd.m, pd.off, ih, ICMP_MINLEN,
+		    &action, &reason, af)) {
+			printf("%s: pf_pull_hdr failed for ICMP\n", __func__);
+			return (-1);
+		}
 
-		if (nk->port[pd->sidx] != ih->icmp_id) {
-			pd->hdr.icmp.icmp_cksum = pf_cksum_fixup(
+		if (nk->port[pd.sidx] != ih->icmp_id) {
+			pd.hdr.icmp.icmp_cksum = pf_cksum_fixup(
 			    ih->icmp_cksum, ih->icmp_id,
-			    nk->port[pd->sidx], 0);
-			ih->icmp_id = nk->port[pd->sidx];
-			pd->sport = &ih->icmp_id;
+			    nk->port[pd.sidx], 0);
+			ih->icmp_id = nk->port[pd.sidx];
+			pd.sport = &ih->icmp_id;
 
-			m_copyback(pd->m, off, ICMP_MINLEN, (caddr_t)ih);
+			m_copyback(pd.m, pd.off, ICMP_MINLEN, (caddr_t)ih);
 		}
 		/* FALLTHROUGH */
 	}
 	default:
-		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) {
-			switch (pd->af) {
+		if (PF_ANEQ(pd.src, &nk->addr[pd.sidx], pd.af)) {
+			switch (pd.af) {
+#ifdef INET
 			case AF_INET:
-				pf_change_a(&pd->src->v4.s_addr,
-				    pd->ip_sum, nk->addr[pd->sidx].v4.s_addr,
+				pf_change_a(&pd.src->v4.s_addr,
+				    pd.ip_sum, nk->addr[pd.sidx].v4.s_addr,
 				    0);
 				break;
+#endif /* INET */
+#ifdef INET6
 			case AF_INET6:
-				pf_addrcpy(pd->src, &nk->addr[pd->sidx],
-				    pd->af);
+				pf_addrcpy(pd.src, &nk->addr[pd.sidx],
+				    pd.af);
 				break;
+#endif /* INET6 */
 			default:
-				unhandled_af(pd->af);
+				unhandled_af(pd.af);
 			}
 		}
-		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) {
-			switch (pd->af) {
+		if (PF_ANEQ(pd.dst, &nk->addr[pd.didx], pd.af)) {
+			switch (pd.af) {
+#ifdef INET
 			case AF_INET:
-				pf_change_a(&pd->dst->v4.s_addr,
-				    pd->ip_sum, nk->addr[pd->didx].v4.s_addr,
+				pf_change_a(&pd.dst->v4.s_addr,
+				    pd.ip_sum, nk->addr[pd.didx].v4.s_addr,
 				    0);
 				break;
+#endif /* INET */
+#ifdef INET6
 			case AF_INET6:
-				pf_addrcpy(pd->dst, &nk->addr[pd->didx],
-				    pd->af);
+				pf_addrcpy(pd.dst, &nk->addr[pd.didx],
+				    pd.af);
 				break;
+#endif /* INET6 */
 			default:
-				unhandled_af(pd->af);
+				unhandled_af(pd.af);
 			}
 		}
 		break;
 	}
+
+	return (0);
 }
 
 static __inline uint32_t
@@ -2401,9 +2471,10 @@
 void
 pf_intr(void *v)
 {
-	struct epoch_tracker et;
-	struct pf_send_head queue;
-	struct pf_send_entry *pfse, *next;
+	struct epoch_tracker	 et;
+	struct pf_send_head	 queue;
+	struct pf_send_entry	*pfse, *next;
+	struct pf_mtag		*pf_mtag;
 
 	CURVNET_SET((struct vnet *)v);
 
@@ -2415,6 +2486,7 @@
 	NET_EPOCH_ENTER(et);
 
 	STAILQ_FOREACH_SAFE(pfse, &queue, pfse_next, next) {
+		pf_mtag = pf_find_mtag(pfse->pfse_m);
 		switch (pfse->pfse_type) {
 #ifdef INET
 		case PFSE_IP: {
@@ -2429,8 +2501,12 @@
 				pfse->pfse_m->m_pkthdr.csum_data = 0xffff;
 				ip_input(pfse->pfse_m);
 			} else {
-				ip_output(pfse->pfse_m, NULL, NULL, 0, NULL,
-				    NULL);
+				if (pf_mtag &&
+				    pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO)
+					pf_forward4(&(pfse->pfse_m), PF_OUT);
+				else
+					ip_output(pfse->pfse_m, NULL, NULL, 0,
+					    NULL, NULL);
 			}
 			break;
 		}
@@ -2452,8 +2528,12 @@
 				pfse->pfse_m->m_pkthdr.csum_data = 0xffff;
 				ip6_input(pfse->pfse_m);
 			} else {
-				ip6_output(pfse->pfse_m, NULL, NULL, 0, NULL,
-				    NULL, NULL);
+				if (pf_mtag &&
+				    pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO)
+					pf_forward6(&(pfse->pfse_m), PF_OUT);
+				else
+					ip6_output(pfse->pfse_m, NULL, NULL, 0,
+					    NULL, NULL, NULL);
 			}
 			break;
 		case PFSE_ICMP6:
@@ -2793,7 +2873,7 @@
 		    s->key[PF_SK_WIRE]->port[0],
 		    s->src.seqhi, s->src.seqlo + 1,
 		    TH_RST|TH_ACK, 0, 0, 0, M_SKIP_FIREWALL, s->tag, 0,
-		    s->act.rtableid);
+		    s->act.rtableid, NULL);
 	}
 
 	LIST_REMOVE(s, entry);
@@ -3555,7 +3635,7 @@
 }
 
 int
-pf_translate_af(struct pf_pdesc *pd)
+pf_translate_af(struct pf_pdesc *pd, struct pf_kstate *st)
 {
 #if defined(INET) && defined(INET6)
 	struct mbuf		*mp;
@@ -3627,6 +3707,21 @@
 		return (-1);
 	}
 
+	/*
+	 * Af-to performed on `pass in` rules is problematic.
+	 * If an IP address of the outbound interface is used for SNAT
+	 * it would be impossible to return such traffic back through normal
+	 * post-af-to routing. To prevent such issues af-to on inbound rules
+	 * creates a state spanning both inbound and outbound interfaces
+	 * and bypasses outbound filtering. This is the original OpenBSD
+	 * implemmentation. FreeBSD supports af-to on `pass out` rules too,
+	 * then such tricks are unnecessary but additional routing to guide
+	 * pre-af-to packets towards the outbound interface must be installed.
+	 */
+	if (st->direction == PF_IN) {
+		pd->m->m_flags |= M_SKIP_FIREWALL;
+	}
+
 	/* recalculate icmp/icmp6 checksums */
 	if (pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6) {
 		int off;
@@ -4028,6 +4123,7 @@
 	struct tcphdr	*th;
 	char		*opt;
 	struct pf_mtag  *pf_mtag;
+	struct m_tag	*pfil_mtag;
 
 	len = 0;
 	th = NULL;
@@ -4069,6 +4165,18 @@
 	pf_mtag->tag = mtag_tag;
 	pf_mtag->flags = mtag_flags;
 
+	/*
+	 * The recreated mbuf must behave as if it has been through pfil
+	 * loop protection.
+	 */
+	pfil_mtag = m_tag_alloc(MTAG_PFIL, MTAG_PFIL_NEXT_HOOK, sizeof(void*),
+	    M_ZERO | M_NOWAIT);
+	if (pfil_mtag == NULL) {
+		m_freem(m);
+		return (NULL);
+	}
+	m_tag_prepend(m, pfil_mtag);
+
 	if (rtableid >= 0)
 		M_SETFIB(m, rtableid);
 
@@ -4281,13 +4389,16 @@
     const struct pf_addr *saddr, const struct pf_addr *daddr,
     u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
     u_int8_t tcp_flags, u_int16_t win, u_int16_t mss, u_int8_t ttl,
-    int mbuf_flags, u_int16_t mtag_tag, u_int16_t mtag_flags, int rtableid)
+    int mbuf_flags, u_int16_t mtag_tag, u_int16_t mtag_flags, int rtableid,
+    struct pf_kstate *st)
 {
-	struct pf_send_entry *pfse;
-	struct mbuf	*m;
+	struct pf_send_entry	*pfse;
+	struct mbuf		*m;
+	struct pf_mtag		*pf_mtag;
 
 	m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, tcp_flags,
 	    win, mss, ttl, mbuf_flags, mtag_tag, mtag_flags, 0, rtableid);
+
 	if (m == NULL)
 		return;
 
@@ -4298,6 +4409,22 @@
 		return;
 	}
 
+	/*
+	 * State is given for sending out packets created by synproxy.
+	 * We are sending packets recreated by pf, pretending to be packets
+	 * coming from the source, so treat them as if they are incoming
+	 * and already filtered.
+	 */
+	if (st != NULL && (st->act.rt == PF_ROUTETO || st->act.rt == PF_REPLYTO)) {
+		if (pf_route_to(m, st, r, &(st->act), NULL, st->direction,
+		    af, af, &pf_mtag)) {
+			m_freem(m);
+			return;
+		}
+		if (st->direction == PF_OUT)
+			pf_skip_hook(m, af, st->direction);
+	}
+
 	switch (af) {
 #ifdef INET
 	case AF_INET:
@@ -4358,7 +4485,8 @@
 			pf_send_tcp(r, pd->af, pd->dst,
 				pd->src, th->th_dport, th->th_sport,
 				ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
-				r->return_ttl, M_SKIP_FIREWALL, 0, 0, rtableid);
+				r->return_ttl, M_SKIP_FIREWALL, 0, 0, rtableid,
+				NULL);
 		}
 	} else if (pd->proto == IPPROTO_SCTP &&
 	    (r->rule_flag & PFRULE_RETURN)) {
@@ -4423,7 +4551,7 @@
 	pf_send_tcp(s->rule, pd->af, pd->dst, pd->src,
 	    pd->hdr.tcp.th_dport, pd->hdr.tcp.th_sport, dst->seqlo,
 	    src->seqlo, TH_ACK, 0, 0, s->rule->return_ttl, 0, 0, 0,
-	    s->rule->rtableid);
+	    s->rule->rtableid, s);
 }
 
 static void
@@ -4471,7 +4599,7 @@
 		M_SETFIB(m0, rtableid);
 
 #ifdef ALTQ
-	if (r->qid) {
+	if (r && r->qid) {
 		pf_mtag->qid = r->qid;
 		/* add hints for ecn */
 		pf_mtag->hdr = mtod(m0, struct ip *);
@@ -5244,17 +5372,6 @@
 	SDT_PROBE3(pf, eth, test_rule, entry, dir, kif->pfik_ifp, m);
 
 	mtag = pf_find_mtag(m);
-	if (mtag != NULL && mtag->flags & PF_MTAG_FLAG_DUMMYNET) {
-		/* Dummynet re-injects packets after they've
-		 * completed their delay. We've already
-		 * processed them, so pass unconditionally. */
-
-		/* But only once. We may see the packet multiple times (e.g.
-		 * PFIL_IN/PFIL_OUT). */
-		pf_dummynet_flag_remove(m, mtag);
-
-		return (PF_PASS);
-	}
 
 	if (__predict_false(m->m_len < sizeof(struct ether_header)) &&
 	    (m = *m0 = m_pullup(*m0, sizeof(struct ether_header))) == NULL) {
@@ -5472,8 +5589,6 @@
 		}
 
 		PF_RULES_RUNLOCK();
-
-		mtag->flags |= PF_MTAG_FLAG_DUMMYNET;
 		ip_dn_io_ptr(m0, &dnflow);
 		if (*m0 != NULL)
 			pf_dummynet_flag_remove(m, mtag);
@@ -6253,7 +6368,7 @@
 		pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport,
 		    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
 		    TH_SYN|TH_ACK, 0, s->src.mss, 0, M_SKIP_FIREWALL, 0, 0,
-		    pd->act.rtableid);
+		    pd->act.rtableid, NULL);
 		REASON_SET(&ctx->reason, PFRES_SYNPROXY);
 		return (PF_SYNPROXY_DROP);
 	}
@@ -6843,7 +6958,7 @@
 				    th->th_sport, ntohl(th->th_ack), 0,
 				    TH_RST, 0, 0,
 				    state->rule->return_ttl, M_SKIP_FIREWALL,
-				    0, 0, state->act.rtableid);
+				    0, 0, state->act.rtableid, NULL);
 			src->seqlo = 0;
 			src->seqhi = 1;
 			src->max_win = 1;
@@ -6968,7 +7083,7 @@
 			    pd->src, th->th_dport, th->th_sport,
 			    state->src.seqhi, ntohl(th->th_seq) + 1,
 			    TH_SYN|TH_ACK, 0, state->src.mss, 0,
-			    M_SKIP_FIREWALL, 0, 0, state->act.rtableid);
+			    M_SKIP_FIREWALL, 0, 0, state->act.rtableid, NULL);
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_SYNPROXY_DROP);
 		} else if ((tcp_get_flags(th) & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK ||
@@ -7001,7 +7116,7 @@
 			    state->dst.seqhi, 0, TH_SYN, 0,
 			    state->src.mss, 0,
 			    state->orig_kif->pfik_ifp == V_loif ? M_LOOP : 0,
-			    state->tag, 0, state->act.rtableid);
+			    state->tag, 0, state->act.rtableid, state);
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_SYNPROXY_DROP);
 		} else if (((tcp_get_flags(th) & (TH_SYN|TH_ACK)) !=
@@ -7016,13 +7131,13 @@
 			    pd->src, th->th_dport, th->th_sport,
 			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
 			    TH_ACK, state->src.max_win, 0, 0, 0,
-			    state->tag, 0, state->act.rtableid);
+			    state->tag, 0, state->act.rtableid, state);
 			pf_send_tcp(state->rule, pd->af,
 			    &sk->addr[pd->sidx], &sk->addr[pd->didx],
 			    sk->port[pd->sidx], sk->port[pd->didx],
 			    state->src.seqhi + 1, state->src.seqlo + 1,
 			    TH_ACK, state->dst.max_win, 0, 0,
-			    M_SKIP_FIREWALL, 0, 0, state->act.rtableid);
+			    M_SKIP_FIREWALL, 0, 0, state->act.rtableid, NULL);
 			state->src.seqdiff = state->dst.seqhi -
 			    state->src.seqlo;
 			state->dst.seqdiff = state->src.seqhi -
@@ -8928,232 +9043,97 @@
 	return (0);
 }
 
+/*
+ * Pf equivalent of ip_forward() and ip_output(). Used only in route-to
+ * and af-to cases, either directly or when reinjecting from dummynet.
+ */
 #ifdef INET
-static int
-pf_route(struct pf_krule *r, struct ifnet *oifp,
-    struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp)
+ void
+pf_forward4(struct mbuf **m, u_int8_t dir)
 {
-	struct mbuf		*m0, *m1, *md;
 	struct route		 ro;
-	const struct sockaddr	*gw = &ro.ro_dst;
-	struct sockaddr_in	*dst;
+	struct mbuf		*m0;
+	struct ifnet		*ifp;
 	struct ip		*ip;
-	struct ifnet		*ifp = NULL;
+	struct sockaddr		*gw = &ro.ro_dst;
+	struct pf_mtag		*pf_mtag;
+	struct pf_kstate	*st;
 	int			 error = 0;
 	uint16_t		 ip_len, ip_off;
-	uint16_t		 tmp;
-	int			 r_dir;
-	bool			 skip_test = false;
-	int			 action = PF_PASS;
 
-	KASSERT(pd->m && r && oifp, ("%s: invalid parameters", __func__));
+	NET_EPOCH_ASSERT();
 
-	SDT_PROBE4(pf, ip, route_to, entry, pd->m, pd, s, oifp);
+#ifdef INVARIANTS
+	pf_mtag = pf_find_mtag(*m);
+#endif /* INVARIANTS */
+	MPASS(pf_mtag != NULL);
+	KASSERT(pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO, ("Called for non-route output"));
 
-	if (s) {
-		r_dir = s->direction;
-	} else {
-		r_dir = r->direction;
+	/* Routing for packets tagged in direction PF_IN */
+	pf_mtag = pf_find_mtag(*m);
+	ifp = ifnet_byindexgen(pf_mtag->if_index, pf_mtag->if_idxgen);
+	if (ifp == NULL || ifp->if_flags & IFF_DYING) {
+		goto bad;
 	}
+	if (ifp->if_flags & IFF_LOOPBACK)
+		(*m)->m_flags |= M_SKIP_FIREWALL;
 
-	KASSERT(pd->dir == PF_IN || pd->dir == PF_OUT ||
-	    r_dir == PF_IN || r_dir == PF_OUT, ("%s: invalid direction",
-	    __func__));
-
-	if ((pd->pf_mtag == NULL &&
-	    ((pd->pf_mtag = pf_get_mtag(pd->m)) == NULL)) ||
-	    pd->pf_mtag->routed++ > 3) {
-		m0 = pd->m;
-		pd->m = NULL;
-		SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
-		action = PF_DROP;
-		goto bad_locked;
-	}
-
-	if (pd->act.rt_kif != NULL)
-		ifp = pd->act.rt_kif->pfik_ifp;
-
-	if (pd->act.rt == PF_DUPTO) {
-		if ((pd->pf_mtag->flags & PF_MTAG_FLAG_DUPLICATED)) {
-			if (s != NULL) {
-				PF_STATE_UNLOCK(s);
-			}
-			if (ifp == oifp) {
-				/* When the 2nd interface is not skipped */
-				return (action);
-			} else {
-				m0 = pd->m;
-				pd->m = NULL;
-				SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
-				action = PF_DROP;
-				goto bad;
-			}
-		} else {
-			pd->pf_mtag->flags |= PF_MTAG_FLAG_DUPLICATED;
-			if (((m0 = m_dup(pd->m, M_NOWAIT)) == NULL)) {
-				if (s)
-					PF_STATE_UNLOCK(s);
-				return (action);
-			}
+	/*
+	 * Continue with inbound pfil hooks.
+	 * pf_test() won't be repeated thanks to the MTAG_PFIL_NEXT_HOOK tag.
+	 */
+	if (dir == PF_IN) {
+		if (pfil_mbuf_in(V_inet_pfil_head, m,
+		    (*m)->m_pkthdr.rcvif, NULL) != PFIL_PASS) {
+			return;
 		}
-	} else {
-		if ((pd->act.rt == PF_REPLYTO) == (r_dir == pd->dir)) {
-			if (pd->af == pd->naf) {
-				pf_dummynet(pd, s, r, &pd->m);
-				if (s)
-					PF_STATE_UNLOCK(s);
-				return (action);
-			} else {
-				if (r_dir == PF_IN) {
-					skip_test = true;
-				}
-			}
-		}
-
-		/*
-		 * If we're actually doing route-to and af-to and are in the
-		 * reply direction.
-		 */
-		if (pd->act.rt_kif && pd->act.rt_kif->pfik_ifp &&
-		    pd->af != pd->naf) {
-			if (pd->act.rt == PF_ROUTETO && r->naf != AF_INET) {
-				/* Un-set ifp so we do a plain route lookup. */
-				ifp = NULL;
-			}
-			if (pd->act.rt == PF_REPLYTO && r->naf != AF_INET6) {
-				/* Un-set ifp so we do a plain route lookup. */
-				ifp = NULL;
-			}
-		}
-		m0 = pd->m;
-	}
-
-	ip = mtod(m0, struct ip *);
-
-	bzero(&ro, sizeof(ro));
-	dst = (struct sockaddr_in *)&ro.ro_dst;
-	dst->sin_family = AF_INET;
-	dst->sin_len = sizeof(struct sockaddr_in);
-	dst->sin_addr.s_addr = pd->act.rt_addr.v4.s_addr;
-
-	if (pd->dir == PF_IN) {
+		ip = mtod(*m, struct ip *);
 		if (ip->ip_ttl <= IPTTLDEC) {
-			if (r->rt != PF_DUPTO)
-				pf_send_icmp(m0, ICMP_TIMXCEED,
-				    ICMP_TIMXCEED_INTRANS, 0, pd->af, r,
-				    pd->act.rtableid);
-			action = PF_DROP;
-			goto bad_locked;
+			pf_send_icmp(*m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
+			    0, AF_INET, NULL, M_GETFIB(*m));
+			goto bad;
 		}
 		ip->ip_ttl -= IPTTLDEC;
 	}
 
-	if (s != NULL) {
-		if (ifp == NULL && (pd->af != pd->naf)) {
-			/* We're in the AFTO case. Do a route lookup. */
-			const struct nhop_object *nh;
-			nh = fib4_lookup(M_GETFIB(m0), ip->ip_dst, 0, NHR_NONE, 0);
-			if (nh) {
-				ifp = nh->nh_ifp;
-
-				/* Use the gateway if needed. */
-				if (nh->nh_flags & NHF_GATEWAY) {
-					gw = &nh->gw_sa;
-					ro.ro_flags |= RT_HAS_GW;
-				} else {
-					dst->sin_addr = ip->ip_dst;
-				}
-
-				/*
-				 * Bind to the correct interface if we're
-				 * if-bound. We don't know which interface
-				 * that will be until here, so we've inserted
-				 * the state on V_pf_all. Fix that now.
-				 */
-				if (s->kif == V_pfi_all && ifp != NULL &&
-				    r->rule_flag & PFRULE_IFBOUND)
-					s->kif = ifp->if_pf_kif;
-			}
-		}
-
-		if (r->rule_flag & PFRULE_IFBOUND &&
-		    pd->act.rt == PF_REPLYTO &&
-		    s->kif == V_pfi_all) {
-			s->kif = pd->act.rt_kif;
-			s->orig_kif = oifp->if_pf_kif;
-		}
-
-		PF_STATE_UNLOCK(s);
+	/*
+	 * Forward through outbound pfil hooks. This will cover outbound
+	 * filtering by pf. pf_test() called by the outbound pfil hook will
+	 * call pf_forward4() on its own and send the packet out. Unless
+	 * there is "set skip" on the outbound interface or the mbuf is tagged
+	 * with M_SKIP_FIREWALL, then the inbound pf_forward4() will send
+	 * the packet out.
+	 */
+	if (pfil_mbuf_out(V_inet_pfil_head, m, ifp, NULL) != PFIL_PASS) {
+		return;
 	}
 
-	if (ifp == NULL) {
-		m0 = pd->m;
-		pd->m = NULL;
-		action = PF_DROP;
-		SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
+	/* Routing for packets tagged in direction PF_OUT */
+	pf_mtag = pf_find_mtag(*m);
+	ifp = ifnet_byindexgen(pf_mtag->if_index, pf_mtag->if_idxgen);
+	if (ifp == NULL || ifp->if_flags & IFF_DYING) {
 		goto bad;
 	}
-
-	if (r->rt == PF_DUPTO)
-		skip_test = true;
-
-	if (pd->dir == PF_IN && !skip_test) {
-		if (pf_test(AF_INET, PF_OUT, PFIL_FWD, ifp, &m0, inp,
-		    &pd->act) != PF_PASS) {
-			action = PF_DROP;
-			SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
-			goto bad;
-		} else if (m0 == NULL) {
-			action = PF_DROP;
-			SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
-			goto done;
-		}
-		if (m0->m_len < sizeof(struct ip)) {
-			DPFPRINTF(PF_DEBUG_URGENT,
-			    "%s: m0->m_len < sizeof(struct ip)", __func__);
-			SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
-			action = PF_DROP;
-			goto bad;
-		}
-		ip = mtod(m0, struct ip *);
-	}
-
 	if (ifp->if_flags & IFF_LOOPBACK)
-		m0->m_flags |= M_SKIP_FIREWALL;
+		(*m)->m_flags |= M_SKIP_FIREWALL;
 
+	bzero(&ro, sizeof(ro));
+	memcpy(gw, &(pf_mtag->dst), sizeof(struct sockaddr_in));
+	ro.ro_flags = pf_mtag->ro_flags;
+
+	ip = mtod(*m, struct ip *);
 	ip_len = ntohs(ip->ip_len);
 	ip_off = ntohs(ip->ip_off);
 
 	/* Copied from FreeBSD 10.0-CURRENT ip_output. */
-	m0->m_pkthdr.csum_flags |= CSUM_IP;
-	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
-		in_delayed_cksum(m0);
-		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+	(*m)->m_pkthdr.csum_flags |= CSUM_IP;
+	if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
+		in_delayed_cksum(*m);
+		(*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
-	if (m0->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
-		pf_sctp_checksum(m0, (uint32_t)(ip->ip_hl << 2));
-		m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
-	}
-
-	if (pd->dir == PF_IN) {
-		/*
-		 * Make sure dummynet gets the correct direction, in case it needs to
-		 * re-inject later.
-		 */
-		pd->dir = PF_OUT;
-
-		/*
-		 * The following processing is actually the rest of the inbound processing, even
-		 * though we've marked it as outbound (so we don't look through dummynet) and it
-		 * happens after the outbound processing (pf_test(PF_OUT) above).
-		 * Swap the dummynet pipe numbers, because it's going to come to the wrong
-		 * conclusion about what direction it's processing, and we can't fix it or it
-		 * will re-inject incorrectly. Swapping the pipe numbers means that its incorrect
-		 * decision will pick the right pipe, and everything will mostly work as expected.
-		 */
-		tmp = pd->act.dnrpipe;
-		pd->act.dnrpipe = pd->act.dnpipe;
-		pd->act.dnpipe = tmp;
+	if ((*m)->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
+		pf_sctp_checksum((*m), (uint32_t)(ip->ip_hl << 2));
+		(*m)->m_pkthdr.csum_flags &= ~CSUM_SCTP;
 	}
 
 	/*
@@ -9161,371 +9141,432 @@
 	 * care of the fragmentation for us, we can just send directly.
 	 */
 	if (ip_len <= ifp->if_mtu ||
-	    (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) {
+	    ((*m)->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) {
 		ip->ip_sum = 0;
-		if (m0->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
-			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
-			m0->m_pkthdr.csum_flags &= ~CSUM_IP;
-		}
-		m_clrprotoflags(m0);	/* Avoid confusing lower layers. */
-
-		md = m0;
-		error = pf_dummynet_route(pd, s, r, ifp, gw, &md);
-		if (md != NULL) {
-			error = (*ifp->if_output)(ifp, md, gw, &ro);
-			SDT_PROBE2(pf, ip, route_to, output, ifp, error);
+		if ((*m)->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
+			ip->ip_sum = in_cksum(*m, ip->ip_hl << 2);
+			(*m)->m_pkthdr.csum_flags &= ~CSUM_IP;
 		}
+		m_clrprotoflags(*m);	/* Avoid confusing lower layers. */
+		error = (*ifp->if_output)(ifp, *m, gw, &ro);
+		SDT_PROBE2(pf, ip, route_to, output, ifp, error);
 		goto done;
 	}
 
 	/* Balk when DF bit is set or the interface didn't support TSO. */
-	if ((ip_off & IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) {
+	if ((ip_off & IP_DF) || ((*m)->m_pkthdr.csum_flags & CSUM_TSO)) {
 		error = EMSGSIZE;
 		KMOD_IPSTAT_INC(ips_cantfrag);
-		if (pd->act.rt != PF_DUPTO) {
-			if (s && s->nat_rule != NULL) {
-				MPASS(m0 == pd->m);
-				PACKET_UNDO_NAT(pd,
-				    (ip->ip_hl << 2) + (ip_off & IP_OFFMASK),
-				    s);
-			}
-
-			pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
-			   ifp->if_mtu, pd->af, r, pd->act.rtableid);
+		st = pf_find_state_byid(pf_mtag->state_id,
+		    pf_mtag->state_creatorid);
+		if (st != NULL && st->nat_rule != NULL &&
+		    pf_undo_nat_state(*m, AF_INET, st, PF_OUT) != -1) {
+			PF_STATE_UNLOCK(st);
+			printf("%s: sending ICMP_UNREACH_NEEDFRAG\n", __func__);
+			pf_send_icmp(*m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
+			   ifp->if_mtu, AF_INET, NULL, M_GETFIB(*m));
 		}
+
 		SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
-		action = PF_DROP;
 		goto bad;
 	}
 
-	error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist);
+	error = ip_fragment(ip, m, ifp->if_mtu, ifp->if_hwassist);
 	if (error) {
 		SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
-		action = PF_DROP;
 		goto bad;
 	}
 
-	for (; m0; m0 = m1) {
-		m1 = m0->m_nextpkt;
-		m0->m_nextpkt = NULL;
+	for (; (*m); (*m) = m0) {
+		m0 = (*m)->m_nextpkt;
+		(*m)->m_nextpkt = 0;
 		if (error == 0) {
 			m_clrprotoflags(m0);
-			md = m0;
-			pd->pf_mtag = pf_find_mtag(md);
-			error = pf_dummynet_route(pd, s, r, ifp,
-			    gw, &md);
-			if (md != NULL) {
-				error = (*ifp->if_output)(ifp, md, gw, &ro);
-				SDT_PROBE2(pf, ip, route_to, output, ifp, error);
-			}
+			error = (*ifp->if_output)(ifp, *m, gw, &ro);
+			SDT_PROBE2(pf, ip, route_to, output, ifp, error);
 		} else
-			m_freem(m0);
+			goto bad;
 	}
 
 	if (error == 0)
 		KMOD_IPSTAT_INC(ips_fragmented);
 
 done:
-	if (pd->act.rt != PF_DUPTO)
-		pd->m = NULL;
-	else
-		action = PF_PASS;
-	return (action);
-
-bad_locked:
-	if (s)
-		PF_STATE_UNLOCK(s);
+		*m = NULL;
+		return;
 bad:
-	m_freem(m0);
-	goto done;
+		m_freem(*m);
+		goto done;
 }
 #endif /* INET */
 
+/*
+ * Pf equivalent of ip6_forward() and ip6_output(). Used only in route-to
+ * and af-to cases, either directly or when reinjecting from dummynet.
+ */
 #ifdef INET6
-static int
-pf_route6(struct pf_krule *r, struct ifnet *oifp,
-    struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp)
+void
+pf_forward6(struct mbuf **m, u_int8_t dir)
 {
-	struct mbuf		*m0, *md;
-	struct m_tag		*mtag;
-	struct sockaddr_in6	dst;
+	struct ifnet		*ifp;
 	struct ip6_hdr		*ip6;
-	struct ifnet		*ifp = NULL;
-	int			 r_dir;
-	bool			 skip_test = false;
-	int			 action = PF_PASS;
+	struct sockaddr_in6	*gw;
+	struct pf_kstate	*st;
+	struct pf_mtag		*pf_mtag;
+	struct m_tag		*reass_mtag;
 
-	KASSERT(pd->m && r && oifp, ("%s: invalid parameters", __func__));
+	NET_EPOCH_ASSERT();
 
-	SDT_PROBE4(pf, ip6, route_to, entry, pd->m, pd, s, oifp);
+#ifdef INVARIANTS
+	pf_mtag = pf_find_mtag(*m);
+#endif /* INVARIANTS */
+	MPASS(pf_mtag != NULL);
+	KASSERT(pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO, ("Called for non-route output"));
 
-	if (s) {
-		r_dir = s->direction;
-	} else {
-		r_dir = r->direction;
+	/* Routing for packets tagged in PF_IN */
+	pf_mtag = pf_find_mtag(*m);
+	ifp = ifnet_byindexgen(pf_mtag->if_index, pf_mtag->if_idxgen);
+	if (ifp == NULL || ifp->if_flags & IFF_DYING) {
+		goto bad;
 	}
+	if (ifp->if_flags & IFF_LOOPBACK)
+		(*m)->m_flags |= M_SKIP_FIREWALL;
 
-	KASSERT(pd->dir == PF_IN || pd->dir == PF_OUT ||
-	    r_dir == PF_IN || r_dir == PF_OUT, ("%s: invalid direction",
-	    __func__));
-
-	if ((pd->pf_mtag == NULL &&
-	    ((pd->pf_mtag = pf_get_mtag(pd->m)) == NULL)) ||
-	    pd->pf_mtag->routed++ > 3) {
-		m0 = pd->m;
-		pd->m = NULL;
-		action = PF_DROP;
-		SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
-		goto bad_locked;
-	}
-
-	if (pd->act.rt_kif != NULL)
-		ifp = pd->act.rt_kif->pfik_ifp;
-
-	if (pd->act.rt == PF_DUPTO) {
-		if ((pd->pf_mtag->flags & PF_MTAG_FLAG_DUPLICATED)) {
-			if (s != NULL) {
-				PF_STATE_UNLOCK(s);
-			}
-			if (ifp == oifp) {
-				/* When the 2nd interface is not skipped */
-				return (action);
-			} else {
-				m0 = pd->m;
-				pd->m = NULL;
-				action = PF_DROP;
-				SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
-				goto bad;
-			}
-		} else {
-			pd->pf_mtag->flags |= PF_MTAG_FLAG_DUPLICATED;
-			if (((m0 = m_dup(pd->m, M_NOWAIT)) == NULL)) {
-				if (s)
-					PF_STATE_UNLOCK(s);
-				return (action);
-			}
+	/*
+	 * Continue with inbound pfil hooks.
+	 * pf_test() won't be repeated thanks to the MTAG_PFIL_NEXT_HOOK tag.
+	 */
+	if (dir == PF_IN) {
+		if (pfil_mbuf_in(V_inet6_pfil_head, m,
+		    (*m)->m_pkthdr.rcvif, NULL) != PFIL_PASS) {
+			return;
 		}
-	} else {
-		if ((pd->act.rt == PF_REPLYTO) == (r_dir == pd->dir)) {
-			if (pd->af == pd->naf) {
-				pf_dummynet(pd, s, r, &pd->m);
-				if (s)
-					PF_STATE_UNLOCK(s);
-				return (action);
-			} else {
-				if (r_dir == PF_IN) {
-					skip_test = true;
-				}
-			}
-		}
-
-		/*
-		 * If we're actually doing route-to and af-to and are in the
-		 * reply direction.
-		 */
-		if (pd->act.rt_kif && pd->act.rt_kif->pfik_ifp &&
-		    pd->af != pd->naf) {
-			if (pd->act.rt == PF_ROUTETO && r->naf != AF_INET6) {
-				/* Un-set ifp so we do a plain route lookup. */
-				ifp = NULL;
-			}
-			if (pd->act.rt == PF_REPLYTO && r->naf != AF_INET) {
-				/* Un-set ifp so we do a plain route lookup. */
-				ifp = NULL;
-			}
-		}
-		m0 = pd->m;
-	}
-
-	ip6 = mtod(m0, struct ip6_hdr *);
-
-	bzero(&dst, sizeof(dst));
-	dst.sin6_family = AF_INET6;
-	dst.sin6_len = sizeof(dst);
-	pf_addrcpy((struct pf_addr *)&dst.sin6_addr, &pd->act.rt_addr,
-	    AF_INET6);
-
-	if (pd->dir == PF_IN) {
+		ip6 = mtod(*m, struct ip6_hdr *);
 		if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
-			if (r->rt != PF_DUPTO)
-				pf_send_icmp(m0, ICMP6_TIME_EXCEEDED,
-				    ICMP6_TIME_EXCEED_TRANSIT, 0, pd->af, r,
-				    pd->act.rtableid);
-			action = PF_DROP;
-			goto bad_locked;
+			pf_send_icmp(*m, ICMP6_TIME_EXCEEDED,
+			    ICMP6_TIME_EXCEED_TRANSIT, 0, AF_INET6, NULL,
+			    M_GETFIB(*m));
+			goto bad;
 		}
 		ip6->ip6_hlim -= IPV6_HLIMDEC;
 	}
 
-	if (s != NULL) {
-		if (ifp == NULL && (pd->af != pd->naf)) {
-			const struct nhop_object *nh;
-			nh = fib6_lookup(M_GETFIB(m0), &ip6->ip6_dst, 0, NHR_NONE, 0);
-			if (nh) {
-				ifp = nh->nh_ifp;
-
-				/* Use the gateway if needed. */
-				if (nh->nh_flags & NHF_GATEWAY)
-					bcopy(&nh->gw6_sa.sin6_addr, &dst.sin6_addr,
-					    sizeof(dst.sin6_addr));
-				else
-					dst.sin6_addr = ip6->ip6_dst;
-
-				/*
-				 * Bind to the correct interface if we're
-				 * if-bound. We don't know which interface
-				 * that will be until here, so we've inserted
-				 * the state on V_pf_all. Fix that now.
-				 */
-				if (s->kif == V_pfi_all && ifp != NULL &&
-				    r->rule_flag & PFRULE_IFBOUND)
-					s->kif = ifp->if_pf_kif;
-			}
-		}
-
-		if (r->rule_flag & PFRULE_IFBOUND &&
-		    pd->act.rt == PF_REPLYTO &&
-		    s->kif == V_pfi_all) {
-			s->kif = pd->act.rt_kif;
-			s->orig_kif = oifp->if_pf_kif;
-		}
-
-		PF_STATE_UNLOCK(s);
+	/*
+	 * Forward through outbound pfil hooks. This will cover outbound
+	 * filtering by pf. pf_test() called by the outbound pfil hook will
+	 * call pf_forward6() on its own and send the packet out. Unless
+	 * there is "set skip" on the outbound interface or the mbuf is tagged
+	 * with M_SKIP_FIREWALL, then the inbound pf_forward6() will send
+	 * the packet out.
+	 */
+	if (pfil_mbuf_out(V_inet6_pfil_head, m, ifp, NULL) != PFIL_PASS) {
+		return;
 	}
 
-	if (pd->af != pd->naf) {
-		struct udphdr *uh = &pd->hdr.udp;
-
-		if (pd->proto == IPPROTO_UDP && uh->uh_sum == 0) {
-			uh->uh_sum = in6_cksum_pseudo(ip6,
-			    ntohs(uh->uh_ulen), IPPROTO_UDP, 0);
-			m_copyback(m0, pd->off, sizeof(*uh), pd->hdr.any);
-		}
-	}
-
-	if (ifp == NULL) {
-		m0 = pd->m;
-		pd->m = NULL;
-		action = PF_DROP;
-		SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
+	/* Routing for packets tagged in direction PF_OUT */
+	pf_mtag = pf_find_mtag(*m);
+	ifp = ifnet_byindexgen(pf_mtag->if_index, pf_mtag->if_idxgen);
+	if (ifp == NULL || ifp->if_flags & IFF_DYING) {
 		goto bad;
 	}
-
-	if (r->rt == PF_DUPTO)
-		skip_test = true;
-
-	if (pd->dir == PF_IN && !skip_test) {
-		if (pf_test(AF_INET6, PF_OUT, PFIL_FWD | PF_PFIL_NOREFRAGMENT,
-		    ifp, &m0, inp, &pd->act) != PF_PASS) {
-			action = PF_DROP;
-			SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
-			goto bad;
-		} else if (m0 == NULL) {
-			action = PF_DROP;
-			SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
-			goto done;
-		}
-		if (m0->m_len < sizeof(struct ip6_hdr)) {
-			DPFPRINTF(PF_DEBUG_URGENT,
-			    "%s: m0->m_len < sizeof(struct ip6_hdr)",
-			    __func__);
-			action = PF_DROP;
-			SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
-			goto bad;
-		}
-		ip6 = mtod(m0, struct ip6_hdr *);
-	}
-
 	if (ifp->if_flags & IFF_LOOPBACK)
-		m0->m_flags |= M_SKIP_FIREWALL;
+		(*m)->m_flags |= M_SKIP_FIREWALL;
 
-	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 &
+	gw = (struct sockaddr_in6*)(&(pf_mtag->dst));
+
+	ip6 = mtod(*m, struct ip6_hdr *);
+
+	if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 &
 	    ~ifp->if_hwassist) {
-		uint32_t plen = m0->m_pkthdr.len - sizeof(*ip6);
-		in6_delayed_cksum(m0, plen, sizeof(struct ip6_hdr));
-		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
-	}
-
-	if (pd->dir == PF_IN) {
-		uint16_t	 tmp;
-		/*
-		 * Make sure dummynet gets the correct direction, in case it needs to
-		 * re-inject later.
-		 */
-		pd->dir = PF_OUT;
-
-		/*
-		 * The following processing is actually the rest of the inbound processing, even
-		 * though we've marked it as outbound (so we don't look through dummynet) and it
-		 * happens after the outbound processing (pf_test(PF_OUT) above).
-		 * Swap the dummynet pipe numbers, because it's going to come to the wrong
-		 * conclusion about what direction it's processing, and we can't fix it or it
-		 * will re-inject incorrectly. Swapping the pipe numbers means that its incorrect
-		 * decision will pick the right pipe, and everything will mostly work as expected.
-		 */
-		tmp = pd->act.dnrpipe;
-		pd->act.dnrpipe = pd->act.dnpipe;
-		pd->act.dnpipe = tmp;
+		uint32_t plen = (*m)->m_pkthdr.len - sizeof(*ip6);
+		in6_delayed_cksum(*m, plen, sizeof(struct ip6_hdr));
+		(*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
 	}
 
 	/*
 	 * If the packet is too large for the outgoing interface,
 	 * send back an icmp6 error.
 	 */
-	if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr))
-		dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
-	mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL);
-	if (mtag != NULL) {
+	if (IN6_IS_SCOPE_EMBED(&(gw->sin6_addr)))
+		gw->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
+	reass_mtag = m_tag_find(*m, PACKET_TAG_PF_REASSEMBLED, NULL);
+	if (reass_mtag != NULL) {
 		int ret __sdt_used;
-		ret = pf_refragment6(ifp, &m0, mtag, ifp, true);
+		ret = pf_refragment6(ifp, m, reass_mtag, ifp,
+		    PF_REFRAGMENT6_PF_FORWARD);
 		SDT_PROBE2(pf, ip6, route_to, output, ifp, ret);
 		goto done;
 	}
 
-	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
-		md = m0;
-		pf_dummynet_route(pd, s, r, ifp, sintosa(&dst), &md);
-		if (md != NULL) {
-			int ret __sdt_used;
-			ret = nd6_output_ifp(ifp, ifp, md, &dst, NULL);
-			SDT_PROBE2(pf, ip6, route_to, output, ifp, ret);
-		}
+	if ((u_long)(*m)->m_pkthdr.len <= ifp->if_mtu) {
+		int ret __sdt_used;
+		ret = nd6_output_ifp(ifp, ifp, *m, gw, NULL);
+		SDT_PROBE2(pf, ip6, route_to, output, ifp, ret);
 	}
 	else {
 		in6_ifstat_inc(ifp, ifs6_in_toobig);
-		if (pd->act.rt != PF_DUPTO) {
-			if (s && s->nat_rule != NULL) {
-				MPASS(m0 == pd->m);
-				PACKET_UNDO_NAT(pd,
-				    ((caddr_t)ip6 - m0->m_data) +
-				    sizeof(struct ip6_hdr), s);
-			}
-
-			if (r->rt != PF_DUPTO)
-				pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0,
-				    ifp->if_mtu, pd->af, r, pd->act.rtableid);
+		st = pf_find_state_byid(pf_mtag->state_id,
+		    pf_mtag->state_creatorid);
+		if (st != NULL && st->nat_rule != NULL &&
+		    pf_undo_nat_state(*m, AF_INET6, st, PF_OUT) != -1) {
+			PF_STATE_UNLOCK(st);
+			printf("%s: sending ICMP6_PACKET_TOO_BIG\n", __func__);
+			pf_send_icmp(*m, ICMP6_PACKET_TOO_BIG, 0,
+			    ifp->if_mtu, AF_INET6, NULL, M_GETFIB((*m)));
 		}
-		action = PF_DROP;
+
 		SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
 		goto bad;
 	}
 
 done:
-	if (pd->act.rt != PF_DUPTO)
-		pd->m = NULL;
-	else
-		action = PF_PASS;
-	return (action);
-
-bad_locked:
-	if (s)
-		PF_STATE_UNLOCK(s);
+	*m = NULL;
+	return;
 bad:
-	m_freem(m0);
+	m_freem(*m);
 	goto done;
 }
 #endif /* INET6 */
 
+/*
+ * Tag packet with routing information from pf_rule_actions.
+ * Used by pf_route_to() and pf_dup_to().
+ */
+static __inline void
+pf_route_tag_to(struct pf_kstate *st, struct pf_mtag *pf_mtag,
+    struct pf_rule_actions *act, sa_family_t naf)
+{
+	struct sockaddr_in	*gw4 = (struct sockaddr_in *)(&pf_mtag->dst);
+	struct sockaddr_in6	*gw6 = (struct sockaddr_in6 *)(&pf_mtag->dst);
+
+	MPASS(pf_mtag != NULL);
+	MPASS(act != NULL);
+	MPASS(act->rt_kif != NULL);
+
+	pf_mtag->flags |= PF_MTAG_FLAG_ROUTE_TO;
+	pf_mtag->if_index = act->rt_kif->pfik_ifp->if_index;
+	pf_mtag->if_idxgen = act->rt_kif->pfik_ifp->if_idxgen;
+	if (st && st->direction == PF_OUT) {
+		/* For pf_undo_nat_state and sending out ICMP errors */
+		pf_mtag->state_id = st->id;
+		pf_mtag->state_creatorid = st->creatorid;
+	}
+
+	switch (naf) {
+#ifdef INET
+	case AF_INET:
+		gw4->sin_family = AF_INET;
+		gw4->sin_len = sizeof(struct sockaddr_in);
+		gw4->sin_addr.s_addr = act->rt_addr.v4.s_addr;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		gw6->sin6_family = AF_INET6;
+		gw6->sin6_len = sizeof(struct sockaddr_in6);
+		memcpy(&(gw6->sin6_addr), act->rt_addr.v6.s6_addr,
+		    sizeof(struct in6_addr));
+		break;
+#endif /* INET6 */
+	}
+}
+
+/*
+ * Duplicate a packet, send it over specified gateway and interface.
+ * Outbound filtering and dummynet are skipped, this is to be like port
+ * mirroring on a switch.
+ */
+static void
+pf_dup_to(struct pf_pdesc *pd, struct pf_kstate *st)
+{
+	struct mbuf	*md;
+	struct pf_mtag	*md_mtag;
+
+	md = m_dup(pd->m, M_NOWAIT);
+	if (md == NULL) {
+		/*
+		 * Don't communicate the error to caller.
+		 * The original packet might still get forwarded just fine.
+		 */
+		return;
+	}
+
+	md->m_flags |= M_SKIP_FIREWALL;
+	md_mtag = pf_get_mtag(md);
+	if (md_mtag == NULL) {
+		/*
+		 * Don't communicate the error to caller.
+		 * The original packet might still get forwarded just fine.
+		 */
+		m_freem(md);
+		return;
+	}
+
+	pf_route_tag_to(st, md_mtag, &(pd->act), pd->naf);
+	switch(pd->naf) {
+#ifdef INET
+		case AF_INET:
+			pf_forward4(&md, PF_OUT);
+			break;
+#endif /* INET */
+#ifdef INET6
+		case AF_INET6:
+			pf_forward6(&md, PF_OUT);
+			break;
+#endif /* INET6 */
+		default:
+			unhandled_af(pd->naf);
+	}
+}
+
+/*
+ * Tag packet for sending it over specific gateway and interface.
+ * We can't use pd here for all attributes because the function can be called
+ * from pf_send_tcp() where pd does not exist for the buit packet.
+ */
+static int
+pf_route_to(struct mbuf *m, struct pf_kstate *st, const struct pf_krule *r,
+	struct pf_rule_actions	*act, struct ifnet *oifp, u_int8_t dir,
+	sa_family_t af, sa_family_t naf, struct pf_mtag **pf_mtag)
+{
+	int			 r_dir;
+
+	KASSERT(m && r, ("%s: invalid parameters", __func__));
+
+	SDT_PROBE3(pf, ip, route_to, entry, m, r, st);
+
+	if (st) {
+		r_dir = st->direction;
+	} else {
+		r_dir = r->direction;
+	}
+
+	KASSERT(dir == PF_IN || dir == PF_OUT ||
+	    r_dir == PF_IN || r_dir == PF_OUT, ("%s: invalid direction",
+	    __func__));
+
+	/*
+	 * In some cases routing is not applied:
+	 * - reply-to in forward direction
+	 * - route-to in reverse direction
+	 */
+	if (af != naf) {
+		if ((act->rt == PF_REPLYTO) == (r->af != naf)) {
+			return (0);
+		}
+	} else if ((act->rt == PF_REPLYTO) == (r_dir == dir)) {
+		return (0);
+	}
+
+	/*
+	 * Bind to the correct interface if we're if-bound.
+	 * We don't know which interface that will be until here,
+	 * so we've inserted the state on V_pf_all. Fix that now.
+	 */
+	if (st != NULL && st->kif == V_pfi_all &&
+	    r->rule_flag & PFRULE_IFBOUND) {
+		if (act->rt == PF_ROUTETO) {
+			MPASS(af != naf);
+			MPASS(st->direction == PF_IN);
+			st->kif = act->rt_kif;
+		} else { /* PF_REPLYTO */
+			st->kif = act->rt_kif;
+			st->orig_kif = oifp->if_pf_kif;
+		}
+	}
+
+	if (act->rt_kif->pfik_ifp->if_flags & IFF_LOOPBACK)
+		m->m_flags |= M_SKIP_FIREWALL;
+
+	(*pf_mtag) = pf_get_mtag(m);
+	if (*pf_mtag == NULL) {
+		return (-1);
+	}
+	pf_route_tag_to(st, *pf_mtag, act, naf);
+
+	return (0);
+}
+
+static int
+pf_route_tag_afto(struct pf_pdesc *pd, struct pf_kstate *st, struct pf_krule *r) {
+	const struct nhop_object	*nh = NULL;
+	struct ifnet			*ifp = NULL;
+	struct ip			*ip;
+	struct ip6_hdr			*ip6;
+	struct sockaddr_in		*gw4;
+	struct sockaddr_in6		*gw6;
+
+	MPASS(st != NULL);
+	MPASS(pd->af != pd->naf);
+
+	pd->pf_mtag = pf_get_mtag(pd->m);
+	pd->pf_mtag->flags |= PF_MTAG_FLAG_ROUTE_TO;
+	gw4 = (struct sockaddr_in *)(&pd->pf_mtag->dst);
+	gw6 = (struct sockaddr_in6 *)(&pd->pf_mtag->dst);
+
+	switch (pd->naf) {
+#ifdef INET
+	case AF_INET:
+		ip = mtod(pd->m, struct ip *);
+		nh = fib4_lookup(M_GETFIB(pd->m), ip->ip_dst, 0, NHR_NONE, 0);
+		if (nh == NULL)
+			return (-1);
+		ifp = nh->nh_ifp;
+		gw4->sin_family = AF_INET;
+		gw4->sin_len = sizeof(struct sockaddr_in);
+
+		/* Use the gateway if needed. */
+		if (nh->nh_flags & NHF_GATEWAY) {
+			gw4->sin_addr = nh->gw4_sa.sin_addr;
+			pd->pf_mtag->ro_flags |= RT_HAS_GW;
+		} else {
+			gw4->sin_addr = ip->ip_dst;
+		}
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		ip6 = mtod(pd->m, struct ip6_hdr *);
+		nh = fib6_lookup(M_GETFIB(pd->m), &ip6->ip6_dst, 0, NHR_NONE, 0);
+		if (nh == NULL)
+			return (-1);
+		ifp = nh->nh_ifp;
+		gw6->sin6_family = AF_INET6;
+		gw6->sin6_len = sizeof(struct sockaddr_in6);
+
+		/* Use the gateway if needed. */
+		if (nh->nh_flags & NHF_GATEWAY) {
+			memcpy(&(gw6->sin6_addr), &nh->gw6_sa.sin6_addr,
+			    sizeof(struct sockaddr_in6));
+			pd->pf_mtag->ro_flags |= RT_HAS_GW;
+		} else {
+			gw6->sin6_addr = ip6->ip6_dst;
+		}
+		break;
+#endif /* INET6 */
+	default:
+		unhandled_af(pd->naf);
+	}
+
+	MPASS(ifp != NULL);
+
+	/*
+	 * Bind to the correct interface if we're if-bound on inbound af-to.
+	 * We don't know which interface that will be until here,
+	 * so we've inserted the state on V_pf_all. Fix that now.
+	 */
+	if (st->kif == V_pfi_all && r->rule_flag & PFRULE_IFBOUND) {
+		MPASS(pd->af != pd->naf);
+		MPASS(st->direction == PF_IN);
+		st->kif = ifp->if_pf_kif;
+	}
+
+	pd->pf_mtag->if_index = ifp->if_index;
+	pd->pf_mtag->if_idxgen = ifp->if_idxgen;
+
+	return (0);
+}
+
 /*
  * FreeBSD supports cksum offloads for the following drivers.
  *  em(4), fxp(4), lge(4), nge(4), re(4), ti(4), txp(4), xl(4)
@@ -9661,9 +9702,6 @@
 		dndir = pd->dir;
 	}
 
-	if (pd->pf_mtag->flags & PF_MTAG_FLAG_DUMMYNETED)
-		return (false);
-
 	memset(dnflow, 0, sizeof(*dnflow));
 
 	if (pd->dport != NULL)
@@ -9757,8 +9795,6 @@
 {
 	struct m_tag *mtag;
 
-	pf_mtag->flags &= ~PF_MTAG_FLAG_DUMMYNET;
-
 	/* dummynet adds this tag, but pf does not need it,
 	 * and keeping it creates unexpected behavior,
 	 * e.g. in case of divert(4) usage right after dummynet. */
@@ -9768,16 +9804,7 @@
 }
 
 static int
-pf_dummynet(struct pf_pdesc *pd, struct pf_kstate *s,
-    struct pf_krule *r, struct mbuf **m0)
-{
-	return (pf_dummynet_route(pd, s, r, NULL, NULL, m0));
-}
-
-static int
-pf_dummynet_route(struct pf_pdesc *pd, struct pf_kstate *s,
-    struct pf_krule *r, struct ifnet *ifp, const struct sockaddr *sa,
-    struct mbuf **m0)
+pf_dummynet(struct pf_pdesc *pd, struct pf_kstate *s, struct pf_krule *r)
 {
 	struct ip_fw_args dnflow;
 
@@ -9787,38 +9814,18 @@
 		return (0);
 
 	if (ip_dn_io_ptr == NULL) {
-		m_freem(*m0);
-		*m0 = NULL;
+		m_freem(pd->m);
+		pd->m = NULL;
 		return (ENOMEM);
 	}
 
 	if (pd->pf_mtag == NULL &&
-	    ((pd->pf_mtag = pf_get_mtag(*m0)) == NULL)) {
-		m_freem(*m0);
-		*m0 = NULL;
+	    ((pd->pf_mtag = pf_get_mtag(pd->m)) == NULL)) {
+		m_freem(pd->m);
+		pd->m = NULL;
 		return (ENOMEM);
 	}
 
-	if (ifp != NULL) {
-		pd->pf_mtag->flags |= PF_MTAG_FLAG_ROUTE_TO;
-
-		pd->pf_mtag->if_index = ifp->if_index;
-		pd->pf_mtag->if_idxgen = ifp->if_idxgen;
-
-		MPASS(sa != NULL);
-
-		switch (sa->sa_family) {
-		case AF_INET:
-			memcpy(&pd->pf_mtag->dst, sa,
-			    sizeof(struct sockaddr_in));
-			break;
-		case AF_INET6:
-			memcpy(&pd->pf_mtag->dst, sa,
-			    sizeof(struct sockaddr_in6));
-			break;
-		}
-	}
-
 	if (s != NULL && s->nat_rule != NULL &&
 	    s->nat_rule->action == PF_RDR &&
 	    (
@@ -9831,17 +9838,11 @@
 		 * as being local. Otherwise it might get dropped
 		 * if dummynet re-injects.
 		 */
-		(*m0)->m_pkthdr.rcvif = V_loif;
+		pd->m->m_pkthdr.rcvif = V_loif;
 	}
 
 	if (pf_pdesc_to_dnflow(pd, r, s, &dnflow)) {
-		pd->pf_mtag->flags |= PF_MTAG_FLAG_DUMMYNET;
-		pd->pf_mtag->flags |= PF_MTAG_FLAG_DUMMYNETED;
-		ip_dn_io_ptr(m0, &dnflow);
-		if (*m0 != NULL) {
-			pd->pf_mtag->flags &= ~PF_MTAG_FLAG_ROUTE_TO;
-			pf_dummynet_flag_remove(*m0, pd->pf_mtag);
-		}
+		ip_dn_io_ptr(&(pd->m), &dnflow);
 	}
 
 	return (0);
@@ -10238,8 +10239,6 @@
 	PF_RULES_ASSERT();
 
 	TAILQ_INIT(&pd->sctp_multihome_jobs);
-	if (default_actions != NULL)
-		memcpy(&pd->act, default_actions, sizeof(pd->act));
 
 	if (pd->pf_mtag && pd->pf_mtag->dnpipe) {
 		pd->act.dnpipe = pd->pf_mtag->dnpipe;
@@ -10687,34 +10686,6 @@
 
 	pf_init_pdesc(&pd, *m0);
 
-	if (pd.pf_mtag != NULL && (pd.pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO)) {
-		pd.pf_mtag->flags &= ~PF_MTAG_FLAG_ROUTE_TO;
-
-		ifp = ifnet_byindexgen(pd.pf_mtag->if_index,
-		    pd.pf_mtag->if_idxgen);
-		if (ifp == NULL || ifp->if_flags & IFF_DYING) {
-			m_freem(*m0);
-			*m0 = NULL;
-			return (PF_PASS);
-		}
-		(ifp->if_output)(ifp, *m0, sintosa(&pd.pf_mtag->dst), NULL);
-		*m0 = NULL;
-		return (PF_PASS);
-	}
-
-	if (ip_dn_io_ptr != NULL && pd.pf_mtag != NULL &&
-	    pd.pf_mtag->flags & PF_MTAG_FLAG_DUMMYNET) {
-		/* Dummynet re-injects packets after they've
-		 * completed their delay. We've already
-		 * processed them, so pass unconditionally. */
-
-		/* But only once. We may see the packet multiple times (e.g.
-		 * PFIL_IN/PFIL_OUT). */
-		pf_dummynet_flag_remove(pd.m, pd.pf_mtag);
-
-		return (PF_PASS);
-	}
-
 	PF_RULES_RLOCK();
 
 	if (pf_setup_pdesc(af, dir, &pd, m0, &action, &reason, kif) == -1) {
@@ -10822,11 +10793,12 @@
 
 				action = pf_test(af, dir, pflags, ifp, &msyn, inp);
 				m_freem(msyn);
-				if (action != PF_PASS)
+				if (action != PF_PASS && action != PF_AFRT)
 					break;
 
 				action = pf_test_state(&s, &pd, &reason);
-				if (action != PF_PASS || s == NULL) {
+				if ((action != PF_PASS && action != PF_AFRT) ||
+				    s == NULL) {
 					action = PF_DROP;
 					break;
 				}
@@ -11058,62 +11030,46 @@
 
 	switch (action) {
 	case PF_SYNPROXY_DROP:
-		m_freem(*m0);
+		m_freem(pd.m);
 	case PF_DEFER:
-		*m0 = NULL;
+		pd.m = NULL;
 		action = PF_PASS;
-		break;
+		goto eat_pkt;
 	case PF_DROP:
-		m_freem(*m0);
-		*m0 = NULL;
-		break;
+		m_freem(pd.m);
+		pd.m = NULL;
+		goto eat_pkt;
 	case PF_AFRT:
-		if (pf_translate_af(&pd)) {
-			*m0 = pd.m;
+		if (pf_translate_af(&pd, s)) {
 			action = PF_DROP;
 			break;
 		}
-#ifdef INET
-		if (pd.naf == AF_INET) {
-			action = pf_route(r, kif->pfik_ifp, s, &pd,
-			    inp);
-		}
-#endif /* INET */
-#ifdef INET6
-		if (pd.naf == AF_INET6) {
-			action = pf_route6(r, kif->pfik_ifp, s, &pd,
-			    inp);
-}
-#endif /* INET6 */
-		*m0 = pd.m;
-		goto out;
-		break;
+		/* fallthrough */
 	default:
 		if (pd.act.rt) {
-			switch (af) {
-#ifdef INET
-			case AF_INET:
-				/* pf_route() returns unlocked. */
-				action = pf_route(r, kif->pfik_ifp, s, &pd,
-				    inp);
+			if (pd.act.rt == PF_DUPTO)
+				pf_dup_to(&pd, s);
+			else if (pf_route_to(pd.m, s, r,
+			    s ? &(s->act) : &(pd.act), kif->pfik_ifp,
+			    pd.dir, pd.af, pd.naf, &(pd.pf_mtag))) {
+				action = PF_DROP;
+				REASON_SET(&reason, PFRES_MEMORY);
 				break;
-#endif /* INET */
-#ifdef INET6
-			case AF_INET6:
-				/* pf_route6() returns unlocked. */
-				action = pf_route6(r, kif->pfik_ifp, s, &pd,
-				    inp);
-				break;
-#endif /* INET6 */
 			}
-			*m0 = pd.m;
-			goto out;
 		}
-		if (pf_dummynet(&pd, s, r, m0) != 0) {
+		if (action == PF_AFRT &&
+		    (pd.pf_mtag == NULL ||
+		    !(pd.pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO)) &&
+		    pf_route_tag_afto(&pd, s, r)) {
+			action = PF_DROP;
+			REASON_SET(&reason, PFRES_NOROUTE);
+			break;
+		}
+		if (pf_dummynet(&pd, s, r) != 0) {
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_MEMORY);
+			break;
 		}
-		break;
 	}
 
 eat_pkt:
@@ -11129,17 +11085,61 @@
 	if (s)
 		PF_STATE_UNLOCK(s);
 
-out:
+	if (pd.m == NULL)
+		goto finish;
+
+	/*
+	 * For af-to packet must have passed either through
+	 * pf_route_to or pf_route_tag_afto.
+	 */
+	KASSERT(action != PF_AFRT || (pd.pf_mtag &&
+	    pd.pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO),
+	    ("%s: af-to requested but no PF_MTAG_FLAG_ROUTE_TO applied\n",
+	    __func__));
+
+	/*
+	 * For route-to and af-to we can't continue on the normal path of packet
+	 * forwarding. Even though ip(6)?_(try)?route\(\) could be modified
+	 * to deal with routing information embedded in pf_mtag, they can't
+	 * deal with AF of the packet changing. Instead send the packet
+	 * to custom forwarding functions. Unless it's the recreated SYN for
+	 * syncookie operation - this must pass through pf_test() and only
+	 * undergo inbound filtering. The function pf_synproxy() will take care
+	 * of sending the new SYN out through outbound filtering and routing.
+	 */
+	if (pd.pf_mtag && pd.pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO &&
+	    !(pd.pf_mtag->flags & PF_MTAG_FLAG_SYNCOOKIE_RECREATED)) {
+		if (pd.af != pd.naf)
+			pf_skip_hook(pd.m, pd.naf, pd.dir);
+		switch (pd.naf) {
+#ifdef INET
+		case AF_INET:
+			pf_forward4(&(pd.m), pd.dir);
+			break;
+#endif /* INET */
+#ifdef INET6
+		case AF_INET6:
+			pf_forward6(&(pd.m), pd.dir);
+			break;
+#endif /* INET6 */
+		default:
+			unhandled_af(pd.naf);
+		}
+	}
+
 #ifdef INET6
 	/* If reassembled packet passed, create new fragments. */
-	if (af == AF_INET6 && action == PF_PASS && *m0 && dir == PF_OUT &&
+	if (af == AF_INET6 && action == PF_PASS && pd.m && dir == PF_OUT &&
 	    (! (pflags & PF_PFIL_NOREFRAGMENT)) &&
 	    (mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL)) != NULL)
-		action = pf_refragment6(ifp, m0, mtag, NULL, pflags & PFIL_FWD);
+		action = pf_refragment6(ifp, &(pd.m), mtag, NULL,
+		    (pflags & PFIL_FWD) ? PF_REFRAGMENT6_IP6_FORWARD : PF_REFRAGMENT6_IP6_OUTPUT);
 #endif /* INET6 */
 
 	pf_sctp_multihome_delayed(&pd, kif, s, action);
 
+finish:
+	*m0 = pd.m;
 	return (action);
 }
 #endif /* INET || INET6 */
diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
--- a/sys/netpfil/pf/pf_ioctl.c
+++ b/sys/netpfil/pf/pf_ioctl.c
@@ -241,18 +241,6 @@
     int flags, void *ruleset __unused, struct inpcb *inp);
 static pfil_return_t pf_eth_check_out(struct mbuf **m, struct ifnet *ifp,
     int flags, void *ruleset __unused, struct inpcb *inp);
-#ifdef INET
-static pfil_return_t pf_check_in(struct mbuf **m, struct ifnet *ifp,
-    int flags, void *ruleset __unused, struct inpcb *inp);
-static pfil_return_t pf_check_out(struct mbuf **m, struct ifnet *ifp,
-    int flags, void *ruleset __unused, struct inpcb *inp);
-#endif
-#ifdef INET6
-static pfil_return_t pf_check6_in(struct mbuf **m, struct ifnet *ifp,
-    int flags, void *ruleset __unused, struct inpcb *inp);
-static pfil_return_t pf_check6_out(struct mbuf **m, struct ifnet *ifp,
-    int flags, void *ruleset __unused, struct inpcb *inp);
-#endif
 
 static void		hook_pf_eth(void);
 static void		hook_pf(void);
@@ -6566,6 +6554,7 @@
 
 	switch (chk) {
 	case PF_PASS:
+	case PF_AFRT:
 		if (*m == NULL)
 			return (PFIL_CONSUMED);
 		else
@@ -6831,6 +6820,36 @@
 	atomic_store_bool(&V_pf_pfil_hooked, false);
 }
 
+void
+pf_skip_hook(struct mbuf *m, sa_family_t naf, u_int8_t dir)
+{
+	/*
+	 * When reinject the AF-tranlated packet to the network stack
+	 * update MTAG_PFIL/MTAG_PFIL_NEXT_HOOK as if pf filtering
+	 * has already happened for the new AF.
+	 */
+	switch (naf) {
+#ifdef AF_INET
+	case AF_INET:
+		if (dir == PF_IN) {
+			pfil_mbuf_skip_hook(m, V_pf_ip4_in_hook);
+		} else {
+			pfil_mbuf_skip_hook(m, V_pf_ip4_out_hook);
+		}
+		break;
+#endif /* AF_INET */
+#ifdef AF_INET6
+	case AF_INET6:
+		if (dir == PF_IN) {
+			pfil_mbuf_skip_hook(m, V_pf_ip6_in_hook);
+		} else {
+			pfil_mbuf_skip_hook(m, V_pf_ip6_out_hook);
+		}
+		break;
+#endif /* AF_INET6 */
+	}
+}
+
 static void
 pf_load_vnet(void)
 {
@@ -6873,6 +6892,9 @@
 
 	pfi_initialize();
 
+	pf_forward4_p = pf_forward4;
+	pf_forward6_p = pf_forward6;
+
 	return (0);
 }
 
@@ -6973,6 +6995,9 @@
 
 	pf_nl_unregister();
 
+	pf_forward4_p = NULL;
+	pf_forward6_p = NULL;
+
 	if (pf_dev != NULL)
 		destroy_dev(pf_dev);
 
diff --git a/sys/netpfil/pf/pf_mtag.h b/sys/netpfil/pf/pf_mtag.h
--- a/sys/netpfil/pf/pf_mtag.h
+++ b/sys/netpfil/pf/pf_mtag.h
@@ -37,11 +37,11 @@
 
 /* pf_mtag -> flags */
 #define	PF_MTAG_FLAG_ROUTE_TO			0x01
-#define	PF_MTAG_FLAG_DUMMYNET			0x02
+/* unused					0x02 */
 #define	PF_MTAG_FLAG_TRANSLATE_LOCALHOST	0x04
 #define	PF_MTAG_FLAG_PACKET_LOOPED		0x08
 #define	PF_MTAG_FLAG_FASTFWD_OURS_PRESENT	0x10
-#define	PF_MTAG_FLAG_DUMMYNETED			0x20
+/* unused					0x20 */
 #define	PF_MTAG_FLAG_DUPLICATED			0x40
 #define	PF_MTAG_FLAG_SYNCOOKIE_RECREATED	0x80
 
@@ -54,9 +54,12 @@
 	u_int8_t	 routed;
 	u_int16_t	 dnpipe;
 	u_int32_t	 dnflags;
-	u_int16_t	 if_index;	/* For ROUTE_TO */
-	u_int16_t	 if_idxgen;	/* For ROUTE_TO */
-	struct sockaddr_storage	dst;	/* For ROUTE_TO */
+	uint64_t	 state_id;
+	uint32_t	 state_creatorid;
+	uint16_t	 ro_flags;	/* For pf_route_to and pf_route_afto */
+	u_int16_t	 if_index;	/* For pf_route_to and pf_route_afto */
+	u_int16_t	 if_idxgen;	/* For pf_route_to and pf_route_afto */
+	struct sockaddr_storage	dst;	/* For pf_route_to and pf_route_afto */
 };
 
 static __inline struct pf_mtag *
diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c
--- a/sys/netpfil/pf/pf_norm.c
+++ b/sys/netpfil/pf/pf_norm.c
@@ -1032,7 +1032,7 @@
 
 int
 pf_refragment6(struct ifnet *ifp, struct mbuf **m0, struct m_tag *mtag,
-    struct ifnet *rt, bool forward)
+    struct ifnet *rt, pf_refragment6_forward_t forward)
 {
 	struct mbuf		*m = *m0, *t;
 	struct ip6_hdr		*hdr;
@@ -1123,12 +1123,21 @@
 				icmp6_error(m, ICMP6_PACKET_TOO_BIG, 0,
 				    if_getmtu(ifp));
 			}
-		} else if (forward) {
-			MPASS(m->m_pkthdr.rcvif != NULL);
-			ip6_forward(m, 0);
 		} else {
-			(void)ip6_output(m, NULL, NULL, 0, NULL, NULL,
-			    NULL);
+			switch (forward) {
+			case PF_REFRAGMENT6_IP6_OUTPUT:
+				(void)ip6_output(m, NULL, NULL, 0, NULL, NULL,
+				    NULL);
+				break;
+			case PF_REFRAGMENT6_IP6_FORWARD:
+				MPASS(m->m_pkthdr.rcvif != NULL);
+				ip6_forward(m, 0);
+				break;
+			case PF_REFRAGMENT6_PF_FORWARD:
+				MPASS(m->m_pkthdr.rcvif != NULL);
+				pf_forward6(&m, PF_OUT);
+				break;
+			}
 		}
 	}
 
diff --git a/sys/netpfil/pf/pf_syncookies.c b/sys/netpfil/pf/pf_syncookies.c
--- a/sys/netpfil/pf/pf_syncookies.c
+++ b/sys/netpfil/pf/pf_syncookies.c
@@ -43,7 +43,7 @@
  * We leave synflood mode when the number of half-open states - including
  * in-flight syncookies - drops far enough again
  */
- 
+
 /*
  * syncookie enabled Initial Sequence Number:
  *  24 bit MAC
@@ -297,7 +297,7 @@
 	pf_send_tcp(NULL, pd->af, pd->dst, pd->src, *pd->dport, *pd->sport,
 	    iss, ntohl(pd->hdr.tcp.th_seq) + 1, TH_SYN|TH_ACK, 0, mss,
 	    0, M_SKIP_FIREWALL | (pd->m->m_flags & M_LOOP), 0, 0,
-	    pd->act.rtableid);
+	    pd->act.rtableid, NULL);
 	counter_u64_add(V_pf_status.lcounters[KLCNT_SYNCOOKIES_SENT], 1);
 	/* XXX Maybe only in adaptive mode? */
 	atomic_add_64(&V_pf_status.syncookies_inflight[V_pf_syncookie_status.oddeven],
@@ -513,7 +513,7 @@
 	mss = pf_syncookie_msstab[cookie.flags.mss_idx];
 	wscale = pf_syncookie_wstab[cookie.flags.wscale_idx];
 
-	return (pf_build_tcp(NULL, pd->af, pd->src, pd->dst, *pd->sport,
+	return(pf_build_tcp(NULL, pd->af, pd->src, pd->dst, *pd->sport,
 	    *pd->dport, seq, 0, TH_SYN, wscale, mss, pd->ttl,
 	    (pd->m->m_flags & M_LOOP), 0, PF_MTAG_FLAG_SYNCOOKIE_RECREATED,
 	    cookie.flags.sack_ok, pd->act.rtableid));