diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c --- a/sys/netpfil/pf/pf_norm.c +++ b/sys/netpfil/pf/pf_norm.c @@ -76,21 +76,20 @@ uint16_t fe_mff; /* more fragment flag */ }; -struct pf_fragment_cmp { - struct pf_addr frc_src; - struct pf_addr frc_dst; - uint32_t frc_id; - sa_family_t frc_af; - uint8_t frc_proto; +RB_HEAD(pf_frag_tree, pf_fragment); +struct pf_frnode { + struct pf_addr fn_src; /* ip source address */ + struct pf_addr fn_dst; /* ip destination address */ + sa_family_t fn_af; /* address family */ + u_int8_t fn_proto; /* protocol for fragments in fn_tree */ + u_int32_t fn_fragments; /* number of entries in fn_tree */ + + RB_ENTRY(pf_frnode) fn_entry; + struct pf_frag_tree fn_tree; /* matching fragments, lookup by id */ }; struct pf_fragment { - struct pf_fragment_cmp fr_key; -#define fr_src fr_key.frc_src -#define fr_dst fr_key.frc_dst -#define fr_id fr_key.frc_id -#define fr_af fr_key.frc_af -#define fr_proto fr_key.frc_proto + uint32_t fr_id; /* fragment id for reassemble */ /* pointers to queue element */ struct pf_frent *fr_firstoff[PF_FRAG_ENTRY_POINTS]; @@ -102,6 +101,7 @@ TAILQ_HEAD(pf_fragq, pf_frent) fr_queue; uint16_t fr_maxlen; /* maximum length of single fragment */ u_int16_t fr_holes; /* number of holes in the queue */ + struct pf_frnode *fr_node; /* ip src/dst/proto/af for fragments */ }; VNET_DEFINE_STATIC(struct mtx, pf_frag_mtx); @@ -114,16 +114,25 @@ VNET_DEFINE_STATIC(uma_zone_t, pf_frent_z); #define V_pf_frent_z VNET(pf_frent_z) +VNET_DEFINE_STATIC(uma_zone_t, pf_frnode_z); +#define V_pf_frnode_z VNET(pf_frnode_z) VNET_DEFINE_STATIC(uma_zone_t, pf_frag_z); #define V_pf_frag_z VNET(pf_frag_z) TAILQ_HEAD(pf_fragqueue, pf_fragment); TAILQ_HEAD(pf_cachequeue, pf_fragment); +RB_HEAD(pf_frnode_tree, pf_frnode); VNET_DEFINE_STATIC(struct pf_fragqueue, pf_fragqueue); #define V_pf_fragqueue VNET(pf_fragqueue) -RB_HEAD(pf_frag_tree, pf_fragment); VNET_DEFINE_STATIC(struct pf_frag_tree, pf_frag_tree); #define V_pf_frag_tree VNET(pf_frag_tree) +static __inline int pf_frnode_compare(struct pf_frnode *, + struct pf_frnode *); +VNET_DEFINE_STATIC(struct pf_frnode_tree, pf_frnode_tree); +#define V_pf_frnode_tree VNET(pf_frnode_tree) +RB_PROTOTYPE(pf_frnode_tree, pf_frnode, fn_entry, pf_frnode_compare); +RB_GENERATE(pf_frnode_tree, pf_frnode, fn_entry, pf_frnode_compare); + static int pf_frag_compare(struct pf_fragment *, struct pf_fragment *); static RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); @@ -134,8 +143,7 @@ static struct pf_frent *pf_create_fragment(u_short *); static int pf_frent_holes(struct pf_frent *frent); -static struct pf_fragment *pf_find_fragment(struct pf_fragment_cmp *key, - struct pf_frag_tree *tree); +static struct pf_fragment *pf_find_fragment(struct pf_frnode *, u_int32_t); static inline int pf_frent_index(struct pf_frent *); static int pf_frent_insert(struct pf_fragment *, struct pf_frent *, struct pf_frent *); @@ -143,7 +151,7 @@ struct pf_frent *); struct pf_frent *pf_frent_previous(struct pf_fragment *, struct pf_frent *); -static struct pf_fragment *pf_fillup_fragment(struct pf_fragment_cmp *, +static struct pf_fragment *pf_fillup_fragment(struct pf_frnode *, u_int32_t, struct pf_frent *, u_short *); static struct mbuf *pf_join_fragment(struct pf_fragment *); #ifdef INET @@ -163,14 +171,13 @@ #ifdef INET static void -pf_ip2key(struct ip *ip, struct pf_fragment_cmp *key) +pf_ip2key(struct ip *ip, struct pf_frnode *key) { - key->frc_src.v4 = ip->ip_src; - key->frc_dst.v4 = ip->ip_dst; - key->frc_af = AF_INET; - key->frc_proto = ip->ip_p; - key->frc_id = ip->ip_id; + key->fn_src.v4 = ip->ip_src; + key->fn_dst.v4 = ip->ip_dst; + key->fn_af = AF_INET; + key->fn_proto = ip->ip_p; } #endif /* INET */ @@ -180,6 +187,9 @@ V_pf_frag_z = uma_zcreate("pf frags", sizeof(struct pf_fragment), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + V_pf_frnode_z = uma_zcreate("pf fragment node", + sizeof(struct pf_frnode), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, 0); V_pf_frent_z = uma_zcreate("pf frag entries", sizeof(struct pf_frent), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); V_pf_state_scrub_z = uma_zcreate("pf state scrubs", @@ -202,26 +212,36 @@ uma_zdestroy(V_pf_state_scrub_z); uma_zdestroy(V_pf_frent_z); + uma_zdestroy(V_pf_frnode_z); uma_zdestroy(V_pf_frag_z); mtx_destroy(&V_pf_frag_mtx); } static int -pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) +pf_frnode_compare(struct pf_frnode *a, struct pf_frnode *b) { int diff; - if ((diff = a->fr_id - b->fr_id) != 0) + if ((diff = a->fn_proto - b->fn_proto) != 0) return (diff); - if ((diff = a->fr_proto - b->fr_proto) != 0) + if ((diff = a->fn_af - b->fn_af) != 0) return (diff); - if ((diff = a->fr_af - b->fr_af) != 0) + if ((diff = pf_addr_cmp(&a->fn_src, &b->fn_src, a->fn_af)) != 0) return (diff); - if ((diff = pf_addr_cmp(&a->fr_src, &b->fr_src, a->fr_af)) != 0) + if ((diff = pf_addr_cmp(&a->fn_dst, &b->fn_dst, a->fn_af)) != 0) return (diff); - if ((diff = pf_addr_cmp(&a->fr_dst, &b->fr_dst, a->fr_af)) != 0) + return (0); +} + +static __inline int +pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) +{ + int diff; + + if ((diff = a->fr_id - b->fr_id) != 0) return (diff); + return (0); } @@ -281,10 +301,20 @@ pf_free_fragment(struct pf_fragment *frag) { struct pf_frent *frent; + struct pf_frnode *frnode; PF_FRAG_ASSERT(); - RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag); + frnode = frag->fr_node; + RB_REMOVE(pf_frag_tree, &frnode->fn_tree, frag); + MPASS(frnode->fn_fragments >= 1); + frnode->fn_fragments--; + if (frnode->fn_fragments == 0) { + MPASS(RB_EMPTY(&frnode->fn_tree)); + RB_REMOVE(pf_frnode_tree, &V_pf_frnode_tree, frnode); + uma_zfree(V_pf_frnode_z, frnode); + } + TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next); /* Free all fragment entries */ @@ -299,17 +329,23 @@ } static struct pf_fragment * -pf_find_fragment(struct pf_fragment_cmp *key, struct pf_frag_tree *tree) +pf_find_fragment(struct pf_frnode *key, uint32_t id) { - struct pf_fragment *frag; + struct pf_fragment *frag, idkey; + struct pf_frnode *frnode; PF_FRAG_ASSERT(); - frag = RB_FIND(pf_frag_tree, tree, (struct pf_fragment *)key); - if (frag != NULL) { - TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next); - TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next); - } + frnode = RB_FIND(pf_frnode_tree, &V_pf_frnode_tree, key); + if (frnode == NULL) + return (NULL); + MPASS(frnode->fn_fragments >= 1); + idkey.fr_id = id; + frag = RB_FIND(pf_frag_tree, &frnode->fn_tree, &idkey); + if (frag == NULL) + return (NULL); + TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next); + TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next); return (frag); } @@ -527,11 +563,12 @@ } static struct pf_fragment * -pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent, - u_short *reason) +pf_fillup_fragment(struct pf_frnode *key, uint32_t id, + struct pf_frent *frent, u_short *reason) { struct pf_frent *after, *next, *prev; struct pf_fragment *frag; + struct pf_frnode *frnode; uint16_t total; PF_FRAG_ASSERT(); @@ -555,12 +592,12 @@ goto bad_fragment; } - DPFPRINTF((key->frc_af == AF_INET ? + DPFPRINTF((key->fn_af == AF_INET ? "reass frag %d @ %d-%d\n" : "reass frag %#08x @ %d-%d\n", - key->frc_id, frent->fe_off, frent->fe_off + frent->fe_len)); + id, frent->fe_off, frent->fe_off + frent->fe_len)); /* Fully buffer all of the fragments in this fragment queue. */ - frag = pf_find_fragment(key, &V_pf_frag_tree); + frag = pf_find_fragment(key, id); /* Create a new reassembly queue for this packet. */ if (frag == NULL) { @@ -574,7 +611,22 @@ } } - *(struct pf_fragment_cmp *)frag = *key; + frnode = RB_FIND(pf_frnode_tree, &V_pf_frnode_tree, key); + if (frnode == NULL) { + frnode = uma_zalloc(V_pf_frnode_z, M_NOWAIT); + if (frnode == NULL) { + pf_flush_fragments(); + frnode = uma_zalloc(V_pf_frnode_z, M_NOWAIT); + if (frnode == NULL) { + REASON_SET(reason, PFRES_MEMORY); + uma_zfree(V_pf_frag_z, frag); + goto drop_fragment; + } + } + *frnode = *key; + RB_INIT(&frnode->fn_tree); + frnode->fn_fragments = 0; + } memset(frag->fr_firstoff, 0, sizeof(frag->fr_firstoff)); memset(frag->fr_entries, 0, sizeof(frag->fr_entries)); frag->fr_timeout = time_uptime; @@ -582,7 +634,14 @@ frag->fr_maxlen = frent->fe_len; frag->fr_holes = 1; - RB_INSERT(pf_frag_tree, &V_pf_frag_tree, frag); + frag->fr_id = id; + frag->fr_node = frnode; + /* RB_INSERT cannot fail as pf_find_fragment() found nothing */ + RB_INSERT(pf_frag_tree, &frnode->fn_tree, frag); + frnode->fn_fragments++; + if (frnode->fn_fragments == 1) + RB_INSERT(pf_frnode_tree, &V_pf_frnode_tree, frnode); + TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next); /* We do not have a previous fragment, cannot fail. */ @@ -592,6 +651,7 @@ } KASSERT(!TAILQ_EMPTY(&frag->fr_queue), ("!TAILQ_EMPTY()->fr_queue")); + MPASS(frag->fr_node); /* Remember maximum fragment len for refragmentation. */ if (frent->fe_len > frag->fr_maxlen) @@ -627,7 +687,7 @@ if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) { uint16_t precut; - if (frag->fr_af == AF_INET6) + if (frag->fr_node->fn_af == AF_INET6) goto free_fragment; precut = prev->fe_off + prev->fe_len - frent->fe_off; @@ -681,7 +741,7 @@ return (frag); free_ipv6_fragment: - if (frag->fr_af == AF_INET) + if (frag->fr_node->fn_af == AF_INET) goto bad_fragment; free_fragment: /* @@ -743,8 +803,8 @@ struct pf_fragment *frag; struct m_tag *mtag; struct pf_fragment_tag *ftag; - struct pf_fragment_cmp key; - uint16_t total, hdrlen; + struct pf_frnode key; + uint16_t total, hdrlen; uint32_t frag_id; uint16_t maxlen; @@ -761,7 +821,7 @@ pf_ip2key(ip, &key); - if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) + if ((frag = pf_fillup_fragment(&key, ip->ip_id, frent, reason)) == NULL) return (PF_DROP); /* The mbuf is part of the fragment entry, no direct free or access */ @@ -835,7 +895,7 @@ struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct pf_frent *frent; struct pf_fragment *frag; - struct pf_fragment_cmp key; + struct pf_frnode key; struct m_tag *mtag; struct pf_fragment_tag *ftag; int off; @@ -858,14 +918,13 @@ frent->fe_off = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); frent->fe_mff = fraghdr->ip6f_offlg & IP6F_MORE_FRAG; - key.frc_src.v6 = ip6->ip6_src; - key.frc_dst.v6 = ip6->ip6_dst; - key.frc_af = AF_INET6; + key.fn_src.v6 = ip6->ip6_src; + key.fn_dst.v6 = ip6->ip6_dst; + key.fn_af = AF_INET6; /* Only the first fragment's protocol is relevant. */ - key.frc_proto = 0; - key.frc_id = fraghdr->ip6f_ident; + key.fn_proto = 0; - if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) { + if ((frag = pf_fillup_fragment(&key, fraghdr->ip6f_ident, frent, reason)) == NULL) { PF_FRAG_UNLOCK(); return (PF_DROP); }