Index: stable/10/sys/net/pfvar.h =================================================================== --- stable/10/sys/net/pfvar.h +++ stable/10/sys/net/pfvar.h @@ -1668,6 +1668,8 @@ int pf_match_tag(struct mbuf *, struct pf_rule *, int *, int); int pf_tag_packet(struct mbuf *, struct pf_pdesc *, int); +int pf_addr_cmp(struct pf_addr *, struct pf_addr *, + sa_family_t); void pf_qid2qname(u_int32_t, char *); VNET_DECLARE(struct pf_kstatus, pf_status); Index: stable/10/sys/netpfil/pf/pf.c =================================================================== --- stable/10/sys/netpfil/pf/pf.c +++ stable/10/sys/netpfil/pf/pf.c @@ -368,6 +368,45 @@ VNET_DEFINE(uint32_t, pf_hashseed); #define V_pf_hashseed VNET(pf_hashseed) +int +pf_addr_cmp(struct pf_addr *a, struct pf_addr *b, sa_family_t af) +{ + + switch (af) { +#ifdef INET + case AF_INET: + if (a->addr32[0] > b->addr32[0]) + return (1); + if (a->addr32[0] < b->addr32[0]) + return (-1); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (a->addr32[3] > b->addr32[3]) + return (1); + if (a->addr32[3] < b->addr32[3]) + return (-1); + if (a->addr32[2] > b->addr32[2]) + return (1); + if (a->addr32[2] < b->addr32[2]) + return (-1); + if (a->addr32[1] > b->addr32[1]) + return (1); + if (a->addr32[1] < b->addr32[1]) + return (-1); + if (a->addr32[0] > b->addr32[0]) + return (1); + if (a->addr32[0] < b->addr32[0]) + return (-1); + break; +#endif /* INET6 */ + default: + panic("%s: unknown address family %u", __func__, af); + } + return (0); +} + static __inline uint32_t pf_hashkey(struct pf_state_key *sk) { Index: stable/10/sys/netpfil/pf/pf_norm.c =================================================================== --- stable/10/sys/netpfil/pf/pf_norm.c +++ stable/10/sys/netpfil/pf/pf_norm.c @@ -1,5 +1,6 @@ /*- * Copyright 2001 Niels Provos + * Copyright 2011 Alexander Bluhm * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -48,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -57,38 +59,51 @@ #endif /* INET6 */ struct pf_frent { - LIST_ENTRY(pf_frent) fr_next; - union { - struct { - struct ip *_fr_ip; - struct mbuf *_fr_m; - } _frag; - struct { - uint16_t _fr_off; - uint16_t _fr_end; - } _cache; - } _u; + TAILQ_ENTRY(pf_frent) fr_next; + struct mbuf *fe_m; + uint16_t fe_hdrlen; /* ipv4 header lenght with ip options + ipv6, extension, fragment header */ + uint16_t fe_extoff; /* last extension header offset or 0 */ + uint16_t fe_len; /* fragment length */ + uint16_t fe_off; /* fragment offset */ + uint16_t fe_mff; /* more fragment flag */ +}; + +struct pf_fragment_cmp { + struct pf_addr frc_src; + struct pf_addr frc_dst; + uint32_t frc_id; + sa_family_t frc_af; + uint8_t frc_proto; + uint8_t frc_direction; }; -#define fr_ip _u._frag._fr_ip -#define fr_m _u._frag._fr_m -#define fr_off _u._cache._fr_off -#define fr_end _u._cache._fr_end struct pf_fragment { + struct pf_fragment_cmp fr_key; +#define fr_src fr_key.frc_src +#define fr_dst fr_key.frc_dst +#define fr_id fr_key.frc_id +#define fr_af fr_key.frc_af +#define fr_proto fr_key.frc_proto +#define fr_direction fr_key.frc_direction + RB_ENTRY(pf_fragment) fr_entry; TAILQ_ENTRY(pf_fragment) frag_next; - struct in_addr fr_src; - struct in_addr fr_dst; - u_int8_t fr_p; /* protocol of this fragment */ - u_int8_t fr_flags; /* status flags */ -#define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ -#define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ -#define PFFRAG_DROP 0x0004 /* Drop all fragments */ + uint8_t fr_flags; /* status flags */ +#define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ +#define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ +#define PFFRAG_DROP 0x0004 /* Drop all fragments */ #define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER)) - u_int16_t fr_id; /* fragment id for reassemble */ - u_int16_t fr_max; /* fragment data max */ - u_int32_t fr_timeout; - LIST_HEAD(, pf_frent) fr_queue; + uint16_t fr_max; /* fragment data max */ + uint32_t fr_timeout; + uint16_t fr_maxlen; /* maximum length of single fragment */ + TAILQ_HEAD(pf_fragq, pf_frent) fr_queue; +}; + +struct pf_fragment_tag { + uint16_t ft_hdrlen; /* header length of reassembled pkt */ + uint16_t ft_extoff; /* last extension header offset or 0 */ + uint16_t ft_maxlen; /* maximum fragment payload length */ }; static struct mtx pf_frag_mtx; @@ -125,15 +140,25 @@ static int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *, struct tcphdr *, int, sa_family_t); #ifdef INET -static void pf_ip2key(struct pf_fragment *, struct ip *); static void pf_scrub_ip(struct mbuf **, u_int32_t, u_int8_t, u_int8_t); static void pf_flush_fragments(void); -static struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *); -static struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, - struct pf_frent *, int); +static struct pf_fragment *pf_find_fragment(struct pf_fragment_cmp *key, + struct pf_frag_tree *tree); +struct pf_frent *pf_create_fragment(u_short *); +static int pf_reassemble(struct mbuf **, struct ip *, int, + u_short *); +int pf_reassemble6(struct mbuf **, struct ip6_hdr *, + struct ip6_frag *, uint16_t, uint16_t, int, + u_short *); static struct mbuf *pf_fragcache(struct mbuf **, struct ip*, struct pf_fragment **, int, int, int *); +static struct pf_fragment *pf_fillup_fragment(struct pf_fragment_cmp *, + struct pf_frent *, u_short *); +int pf_isfull_fragment(struct pf_fragment *); +struct mbuf *pf_join_fragment(struct pf_fragment *); + + #endif /* INET */ #ifdef INET6 static void pf_scrub_ip6(struct mbuf **, u_int8_t); @@ -145,6 +170,18 @@ } \ } while(0) +static void +pf_ip2key(struct ip *ip, int dir, struct pf_fragment_cmp *key) +{ + + key->frc_src.v4 = ip->ip_src; + key->frc_dst.v4 = ip->ip_dst; + key->frc_af = AF_INET; + key->frc_proto = ip->ip_p; + key->frc_id = ip->ip_id; + key->frc_direction = dir; +} + void pf_normalize_init(void) { @@ -184,18 +221,16 @@ { int diff; - if ((diff = a->fr_id - b->fr_id)) + if ((diff = a->fr_id - b->fr_id) != 0) return (diff); - else if ((diff = a->fr_p - b->fr_p)) + if ((diff = a->fr_proto - b->fr_proto) != 0) + return (diff); + if ((diff = a->fr_af - b->fr_af) != 0) + return (diff); + if ((diff = pf_addr_cmp(&a->fr_src, &b->fr_src, a->fr_af)) != 0) + return (diff); + if ((diff = pf_addr_cmp(&a->fr_dst, &b->fr_dst, a->fr_af)) != 0) return (diff); - else if (a->fr_src.s_addr < b->fr_src.s_addr) - return (-1); - else if (a->fr_src.s_addr > b->fr_src.s_addr) - return (1); - else if (a->fr_dst.s_addr < b->fr_dst.s_addr) - return (-1); - else if (a->fr_dst.s_addr > b->fr_dst.s_addr) - return (1); return (0); } @@ -270,23 +305,23 @@ /* Free all fragments */ if (BUFFER_FRAGMENTS(frag)) { - for (frent = LIST_FIRST(&frag->fr_queue); frent; - frent = LIST_FIRST(&frag->fr_queue)) { - LIST_REMOVE(frent, fr_next); + for (frent = TAILQ_FIRST(&frag->fr_queue); frent; + frent = TAILQ_FIRST(&frag->fr_queue)) { + TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); - m_freem(frent->fr_m); + m_freem(frent->fe_m); uma_zfree(V_pf_frent_z, frent); } } else { - for (frent = LIST_FIRST(&frag->fr_queue); frent; - frent = LIST_FIRST(&frag->fr_queue)) { - LIST_REMOVE(frent, fr_next); - - KASSERT((LIST_EMPTY(&frag->fr_queue) || - LIST_FIRST(&frag->fr_queue)->fr_off > - frent->fr_end), - ("! (LIST_EMPTY() || LIST_FIRST()->fr_off >" - " frent->fr_end): %s", __func__)); + for (frent = TAILQ_FIRST(&frag->fr_queue); frent; + frent = TAILQ_FIRST(&frag->fr_queue)) { + TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); + + KASSERT((TAILQ_EMPTY(&frag->fr_queue) || + TAILQ_FIRST(&frag->fr_queue)->fe_off > + frent->fe_len), + ("! (TAILQ_EMPTY() || TAILQ_FIRST()->fe_off >" + " frent->fe_len): %s", __func__)); uma_zfree(V_pf_frent_z, frent); } @@ -296,26 +331,14 @@ } #ifdef INET -static void -pf_ip2key(struct pf_fragment *key, struct ip *ip) -{ - key->fr_p = ip->ip_p; - key->fr_id = ip->ip_id; - key->fr_src.s_addr = ip->ip_src.s_addr; - key->fr_dst.s_addr = ip->ip_dst.s_addr; -} - static struct pf_fragment * -pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree) +pf_find_fragment(struct pf_fragment_cmp *key, struct pf_frag_tree *tree) { - struct pf_fragment key; struct pf_fragment *frag; PF_FRAG_ASSERT(); - pf_ip2key(&key, ip); - - frag = RB_FIND(pf_frag_tree, tree, &key); + frag = RB_FIND(pf_frag_tree, tree, (struct pf_fragment *)key); if (frag != NULL) { /* XXX Are we sure we want to update the timeout? */ frag->fr_timeout = time_uptime; @@ -352,210 +375,412 @@ } #ifdef INET -#define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3) -static struct mbuf * -pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, - struct pf_frent *frent, int mff) +struct pf_frent * +pf_create_fragment(u_short *reason) { - struct mbuf *m = *m0, *m2; - struct pf_frent *frea, *next; - struct pf_frent *frep = NULL; - struct ip *ip = frent->fr_ip; - int hlen = ip->ip_hl << 2; - u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; - u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4; - u_int16_t max = ip_len + off; + struct pf_frent *frent; PF_FRAG_ASSERT(); - KASSERT((*frag == NULL || BUFFER_FRAGMENTS(*frag)), - ("! (*frag == NULL || BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__)); - /* Strip off ip header */ - m->m_data += hlen; - m->m_len -= hlen; + frent = uma_zalloc(V_pf_frent_z, M_NOWAIT); + if (frent == NULL) { + pf_flush_fragments(); + frent = uma_zalloc(V_pf_frent_z, M_NOWAIT); + if (frent == NULL) { + REASON_SET(reason, PFRES_MEMORY); + return (NULL); + } + } + + return (frent); +} - /* Create a new reassembly queue for this packet */ - if (*frag == NULL) { - *frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); - if (*frag == NULL) { +struct pf_fragment * +pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent, + u_short *reason) +{ + struct pf_frent *after, *next, *prev; + struct pf_fragment *frag; + uint16_t total; + + PF_FRAG_ASSERT(); + + /* No empty fragments. */ + if (frent->fe_len == 0) { + DPFPRINTF(("bad fragment: len 0")); + goto bad_fragment; + } + + /* All fragments are 8 byte aligned. */ + if (frent->fe_mff && (frent->fe_len & 0x7)) { + DPFPRINTF(("bad fragment: mff and len %d", frent->fe_len)); + goto bad_fragment; + } + + /* Respect maximum length, IP_MAXPACKET == IPV6_MAXPACKET. */ + if (frent->fe_off + frent->fe_len > IP_MAXPACKET) { + DPFPRINTF(("bad fragment: max packet %d", + frent->fe_off + frent->fe_len)); + goto bad_fragment; + } + + DPFPRINTF((key->frc_af == AF_INET ? + "reass frag %d @ %d-%d" : "reass frag %#08x @ %d-%d", + key->frc_id, frent->fe_off, frent->fe_off + frent->fe_len)); + + /* Fully buffer all of the fragments in this fragment queue. */ + frag = pf_find_fragment(key, &V_pf_frag_tree); + + /* Create a new reassembly queue for this packet. */ + if (frag == NULL) { + frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); + if (frag == NULL) { pf_flush_fragments(); - *frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); - if (*frag == NULL) + frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); + if (frag == NULL) { + REASON_SET(reason, PFRES_MEMORY); goto drop_fragment; + } } - (*frag)->fr_flags = 0; - (*frag)->fr_max = 0; - (*frag)->fr_src = frent->fr_ip->ip_src; - (*frag)->fr_dst = frent->fr_ip->ip_dst; - (*frag)->fr_p = frent->fr_ip->ip_p; - (*frag)->fr_id = frent->fr_ip->ip_id; - (*frag)->fr_timeout = time_uptime; - LIST_INIT(&(*frag)->fr_queue); + *(struct pf_fragment_cmp *)frag = *key; + frag->fr_timeout = time_second; + frag->fr_maxlen = frent->fe_len; + TAILQ_INIT(&frag->fr_queue); + + RB_INSERT(pf_frag_tree, &V_pf_frag_tree, frag); + TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next); - RB_INSERT(pf_frag_tree, &V_pf_frag_tree, *frag); - TAILQ_INSERT_HEAD(&V_pf_fragqueue, *frag, frag_next); + /* We do not have a previous fragment. */ + TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); - /* We do not have a previous fragment */ - frep = NULL; - goto insert; + return (frag); } - /* - * Find a fragment after the current one: - * - off contains the real shifted offset. - */ - LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) { - if (FR_IP_OFF(frea) > off) + KASSERT(!TAILQ_EMPTY(&frag->fr_queue), ("!TAILQ_EMPTY()->fr_queue")); + + /* Remember maximum fragment len for refragmentation. */ + if (frent->fe_len > frag->fr_maxlen) + frag->fr_maxlen = frent->fe_len; + + /* Maximum data we have seen already. */ + total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + + TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; + + /* Non terminal fragments must have more fragments flag. */ + if (frent->fe_off + frent->fe_len < total && !frent->fe_mff) + goto bad_fragment; + + /* Check if we saw the last fragment already. */ + if (!TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) { + if (frent->fe_off + frent->fe_len > total || + (frent->fe_off + frent->fe_len == total && frent->fe_mff)) + goto bad_fragment; + } else { + if (frent->fe_off + frent->fe_len == total && !frent->fe_mff) + goto bad_fragment; + } + + /* Find a fragment after the current one. */ + prev = NULL; + TAILQ_FOREACH(after, &frag->fr_queue, fr_next) { + if (after->fe_off > frent->fe_off) break; - frep = frea; + prev = after; } - KASSERT((frep != NULL || frea != NULL), - ("!(frep != NULL || frea != NULL): %s", __FUNCTION__));; + KASSERT(prev != NULL || after != NULL, + ("prev != NULL || after != NULL")); - if (frep != NULL && - FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * - 4 > off) - { - u_int16_t precut; + if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) { + uint16_t precut; - precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - - frep->fr_ip->ip_hl * 4 - off; - if (precut >= ip_len) - goto drop_fragment; - m_adj(frent->fr_m, precut); - DPFPRINTF(("overlap -%d\n", precut)); - /* Enforce 8 byte boundaries */ - ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3)); - off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; - ip_len -= precut; - ip->ip_len = htons(ip_len); - } - - for (; frea != NULL && ip_len + off > FR_IP_OFF(frea); - frea = next) - { - u_int16_t aftercut; - - aftercut = ip_len + off - FR_IP_OFF(frea); - DPFPRINTF(("adjust overlap %d\n", aftercut)); - if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl - * 4) - { - frea->fr_ip->ip_len = - htons(ntohs(frea->fr_ip->ip_len) - aftercut); - frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) + - (aftercut >> 3)); - m_adj(frea->fr_m, aftercut); + precut = prev->fe_off + prev->fe_len - frent->fe_off; + if (precut >= frent->fe_len) + goto bad_fragment; + DPFPRINTF(("overlap -%d", precut)); + m_adj(frent->fe_m, precut); + frent->fe_off += precut; + frent->fe_len -= precut; + } + + for (; after != NULL && frent->fe_off + frent->fe_len > after->fe_off; + after = next) { + uint16_t aftercut; + + aftercut = frent->fe_off + frent->fe_len - after->fe_off; + DPFPRINTF(("adjust overlap %d", aftercut)); + if (aftercut < after->fe_len) { + m_adj(after->fe_m, aftercut); + after->fe_off += aftercut; + after->fe_len -= aftercut; break; } - /* This fragment is completely overlapped, lose it */ - next = LIST_NEXT(frea, fr_next); - m_freem(frea->fr_m); - LIST_REMOVE(frea, fr_next); - uma_zfree(V_pf_frent_z, frea); + /* This fragment is completely overlapped, lose it. */ + next = TAILQ_NEXT(after, fr_next); + m_freem(after->fe_m); + TAILQ_REMOVE(&frag->fr_queue, after, fr_next); + uma_zfree(V_pf_frent_z, after); } - insert: - /* Update maximum data size */ - if ((*frag)->fr_max < max) - (*frag)->fr_max = max; - /* This is the last segment */ - if (!mff) - (*frag)->fr_flags |= PFFRAG_SEENLAST; - - if (frep == NULL) - LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next); + if (prev == NULL) + TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); else - LIST_INSERT_AFTER(frep, frent, fr_next); + TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next); + + return (frag); + +bad_fragment: + REASON_SET(reason, PFRES_FRAG); +drop_fragment: + uma_zfree(V_pf_frent_z, frent); + return (NULL); +} + +int +pf_isfull_fragment(struct pf_fragment *frag) +{ + struct pf_frent *frent, *next; + uint16_t off, total; /* Check if we are completely reassembled */ - if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) - return (NULL); + if (TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) + return (0); + + /* Maximum data we have seen already */ + total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + + TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; /* Check if we have all the data */ off = 0; - for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) { - next = LIST_NEXT(frep, fr_next); + for (frent = TAILQ_FIRST(&frag->fr_queue); frent; frent = next) { + next = TAILQ_NEXT(frent, fr_next); - off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4; - if (off < (*frag)->fr_max && - (next == NULL || FR_IP_OFF(next) != off)) - { - DPFPRINTF(("missing fragment at %d, next %d, max %d\n", - off, next == NULL ? -1 : FR_IP_OFF(next), - (*frag)->fr_max)); - return (NULL); + off += frent->fe_len; + if (off < total && (next == NULL || next->fe_off != off)) { + DPFPRINTF(("missing fragment at %d, next %d, total %d", + off, next == NULL ? -1 : next->fe_off, total)); + return (0); } } - DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max)); - if (off < (*frag)->fr_max) - return (NULL); + DPFPRINTF(("%d < %d?", off, total)); + if (off < total) + return (0); + KASSERT(off == total, ("off == total")); - /* We have all the data */ - frent = LIST_FIRST(&(*frag)->fr_queue); - KASSERT((frent != NULL), ("frent == NULL: %s", __FUNCTION__)); - if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) { - DPFPRINTF(("drop: too big: %d\n", off)); - pf_free_fragment(*frag); - *frag = NULL; - return (NULL); - } - next = LIST_NEXT(frent, fr_next); + return (1); +} - /* Magic from ip_input */ - ip = frent->fr_ip; - m = frent->fr_m; +struct mbuf * +pf_join_fragment(struct pf_fragment *frag) +{ + struct mbuf *m, *m2; + struct pf_frent *frent, *next; + + frent = TAILQ_FIRST(&frag->fr_queue); + next = TAILQ_NEXT(frent, fr_next); + + /* Magic from ip_input. */ + m = frent->fe_m; m2 = m->m_next; m->m_next = NULL; m_cat(m, m2); uma_zfree(V_pf_frent_z, frent); for (frent = next; frent != NULL; frent = next) { - next = LIST_NEXT(frent, fr_next); + next = TAILQ_NEXT(frent, fr_next); - m2 = frent->fr_m; + m2 = frent->fe_m; + /* Strip off ip header. */ + m_adj(m2, frent->fe_hdrlen); uma_zfree(V_pf_frent_z, frent); - m->m_pkthdr.csum_flags &= m2->m_pkthdr.csum_flags; - m->m_pkthdr.csum_data += m2->m_pkthdr.csum_data; m_cat(m, m2); } - while (m->m_pkthdr.csum_data & 0xffff0000) - m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) + - (m->m_pkthdr.csum_data >> 16); - ip->ip_src = (*frag)->fr_src; - ip->ip_dst = (*frag)->fr_dst; - - /* Remove from fragment queue */ - pf_remove_fragment(*frag); - *frag = NULL; - - hlen = ip->ip_hl << 2; - ip->ip_len = htons(off + hlen); - m->m_len += hlen; - m->m_data -= hlen; + /* Remove from fragment queue. */ + pf_remove_fragment(frag); + + return (m); +} + +#define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3) +static int +pf_reassemble(struct mbuf **m0, struct ip *ip, int dir, u_short *reason) +{ + struct mbuf *m = *m0; + struct pf_frent *frent; + struct pf_fragment *frag; + struct pf_fragment_cmp key; + uint16_t total, hdrlen; + + /* Get an entry for the fragment queue */ + if ((frent = pf_create_fragment(reason)) == NULL) + return (PF_DROP); + + frent->fe_m = m; + frent->fe_hdrlen = ip->ip_hl << 2; + frent->fe_extoff = 0; + frent->fe_len = ntohs(ip->ip_len) - (ip->ip_hl << 2); + frent->fe_off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; + frent->fe_mff = ntohs(ip->ip_off) & IP_MF; + + pf_ip2key(ip, dir, &key); + + if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) + return (PF_DROP); + + /* The mbuf is part of the fragment entry, no direct free or access */ + m = *m0 = NULL; + + if (!pf_isfull_fragment(frag)) + return (PF_PASS); /* drop because *m0 is NULL, no error */ + + /* We have all the data */ + frent = TAILQ_FIRST(&frag->fr_queue); + KASSERT(frent != NULL, ("frent != NULL")); + total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + + TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; + hdrlen = frent->fe_hdrlen; + + m = *m0 = pf_join_fragment(frag); + frag = NULL; - /* some debugging cruft by sklower, below, will go away soon */ - /* XXX this should be done elsewhere */ if (m->m_flags & M_PKTHDR) { int plen = 0; - for (m2 = m; m2; m2 = m2->m_next) - plen += m2->m_len; + for (m = *m0; m; m = m->m_next) + plen += m->m_len; + m = *m0; m->m_pkthdr.len = plen; } + ip = mtod(m, struct ip *); + ip->ip_len = htons(hdrlen + total); + ip->ip_off &= ~(IP_MF|IP_OFFMASK); + + if (hdrlen + total > IP_MAXPACKET) { + DPFPRINTF(("drop: too big: %d", total)); + ip->ip_len = 0; + REASON_SET(reason, PFRES_SHORT); + /* PF_DROP requires a valid mbuf *m0 in pf_test() */ + return (PF_DROP); + } + DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len))); - return (m); + return (PF_PASS); +} - drop_fragment: - /* Oops - fail safe - drop packet */ - uma_zfree(V_pf_frent_z, frent); - m_freem(m); - return (NULL); +#ifdef INET6 +int +pf_reassemble6(struct mbuf **m0, struct ip6_hdr *ip6, struct ip6_frag *fraghdr, + uint16_t hdrlen, uint16_t extoff, int dir, u_short *reason) +{ + struct mbuf *m = *m0; + struct pf_frent *frent; + struct pf_fragment *frag; + struct pf_fragment_cmp key; + int off; + uint16_t total, maxlen; + uint8_t proto; + + PF_FRAG_LOCK(); + + /* Get an entry for the fragment queue. */ + if ((frent = pf_create_fragment(reason)) == NULL) { + PF_FRAG_UNLOCK(); + return (PF_DROP); + } + + frent->fe_m = m; + frent->fe_hdrlen = hdrlen; + frent->fe_extoff = extoff; + frent->fe_len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - hdrlen; + frent->fe_off = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); + frent->fe_mff = fraghdr->ip6f_offlg & IP6F_MORE_FRAG; + + key.frc_src.v6 = ip6->ip6_src; + key.frc_dst.v6 = ip6->ip6_dst; + key.frc_af = AF_INET6; + /* Only the first fragment's protocol is relevant. */ + key.frc_proto = 0; + key.frc_id = fraghdr->ip6f_ident; + key.frc_direction = dir; + + if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) { + PF_FRAG_UNLOCK(); + return (PF_DROP); + } + + /* The mbuf is part of the fragment entry, no direct free or access. */ + m = *m0 = NULL; + + if (!pf_isfull_fragment(frag)) { + PF_FRAG_UNLOCK(); + return (PF_PASS); /* Drop because *m0 is NULL, no error. */ + } + + /* We have all the data. */ + extoff = frent->fe_extoff; + maxlen = frag->fr_maxlen; + frent = TAILQ_FIRST(&frag->fr_queue); + KASSERT(frent != NULL, ("frent != NULL")); + total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + + TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; + hdrlen = frent->fe_hdrlen - sizeof(struct ip6_frag); + + m = *m0 = pf_join_fragment(frag); + frag = NULL; + + PF_FRAG_UNLOCK(); + + /* Take protocol from first fragment header. */ + m = m_getptr(m, hdrlen + offsetof(struct ip6_frag, ip6f_nxt), &off); + KASSERT(m, ("%s: short mbuf chain", __func__)); + proto = *(mtod(m, caddr_t) + off); + m = *m0; + + /* Delete frag6 header */ + if (ip6_deletefraghdr(m, hdrlen, M_NOWAIT) != 0) + goto fail; + + if (m->m_flags & M_PKTHDR) { + int plen = 0; + for (m = *m0; m; m = m->m_next) + plen += m->m_len; + m = *m0; + m->m_pkthdr.len = plen; + } + + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_plen = htons(hdrlen - sizeof(struct ip6_hdr) + total); + if (extoff) { + /* Write protocol into next field of last extension header. */ + m = m_getptr(m, extoff + offsetof(struct ip6_ext, ip6e_nxt), + &off); + KASSERT(m, ("%s: short mbuf chain", __func__)); + *(mtod(m, char *) + off) = proto; + m = *m0; + } else + ip6->ip6_nxt = proto; + + if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) { + DPFPRINTF(("drop: too big: %d", total)); + ip6->ip6_plen = 0; + REASON_SET(reason, PFRES_SHORT); + /* PF_DROP requires a valid mbuf *m0 in pf_test6(). */ + return (PF_DROP); + } + + DPFPRINTF(("complete: %p(%d)", m, ntohs(ip6->ip6_plen))); + return (PF_PASS); + +fail: + REASON_SET(reason, PFRES_MEMORY); + /* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later. */ + return (PF_DROP); } +#endif + static struct mbuf * pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, int drop, int *nomem) @@ -591,16 +816,15 @@ (*frag)->fr_flags = PFFRAG_NOBUFFER; (*frag)->fr_max = 0; - (*frag)->fr_src = h->ip_src; - (*frag)->fr_dst = h->ip_dst; - (*frag)->fr_p = h->ip_p; + (*frag)->fr_src.v4 = h->ip_src; + (*frag)->fr_dst.v4 = h->ip_dst; (*frag)->fr_id = h->ip_id; (*frag)->fr_timeout = time_uptime; - cur->fr_off = off; - cur->fr_end = max; - LIST_INIT(&(*frag)->fr_queue); - LIST_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next); + cur->fe_off = off; + cur->fe_len = max; /* TODO: fe_len = max - off ? */ + TAILQ_INIT(&(*frag)->fr_queue); + TAILQ_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next); RB_INSERT(pf_frag_tree, &V_pf_cache_tree, *frag); TAILQ_INSERT_HEAD(&V_pf_cachequeue, *frag, frag_next); @@ -615,8 +839,8 @@ * - off contains the real shifted offset. */ frp = NULL; - LIST_FOREACH(fra, &(*frag)->fr_queue, fr_next) { - if (fra->fr_off > off) + TAILQ_FOREACH(fra, &(*frag)->fr_queue, fr_next) { + if (fra->fe_off > off) break; frp = fra; } @@ -627,18 +851,18 @@ if (frp != NULL) { int precut; - precut = frp->fr_end - off; + precut = frp->fe_len - off; if (precut >= ip_len) { /* Fragment is entirely a duplicate */ DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n", - h->ip_id, frp->fr_off, frp->fr_end, off, max)); + h->ip_id, frp->fe_off, frp->fe_len, off, max)); goto drop_fragment; } if (precut == 0) { /* They are adjacent. Fixup cache entry */ DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n", - h->ip_id, frp->fr_off, frp->fr_end, off, max)); - frp->fr_end = max; + h->ip_id, frp->fe_off, frp->fe_len, off, max)); + frp->fe_len = max; } else if (precut > 0) { /* The first part of this payload overlaps with a * fragment that has already been passed. @@ -648,13 +872,13 @@ */ DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n", - h->ip_id, precut, frp->fr_off, frp->fr_end, off, + h->ip_id, precut, frp->fe_off, frp->fe_len, off, max)); off += precut; max -= precut; /* Update the previous frag to encompass this one */ - frp->fr_end = max; + frp->fe_len = max; if (!drop) { /* XXX Optimization opportunity @@ -702,16 +926,16 @@ /* There is a gap between fragments */ DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n", - h->ip_id, -precut, frp->fr_off, frp->fr_end, off, + h->ip_id, -precut, frp->fe_off, frp->fe_len, off, max)); cur = uma_zalloc(V_pf_frent_z, M_NOWAIT); if (cur == NULL) goto no_mem; - cur->fr_off = off; - cur->fr_end = max; - LIST_INSERT_AFTER(frp, cur, fr_next); + cur->fe_off = off; + cur->fe_len = max; + TAILQ_INSERT_AFTER(&(*frag)->fr_queue, frp, cur, fr_next); } } @@ -719,19 +943,19 @@ int aftercut; int merge = 0; - aftercut = max - fra->fr_off; + aftercut = max - fra->fe_off; if (aftercut == 0) { /* Adjacent fragments */ DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n", - h->ip_id, off, max, fra->fr_off, fra->fr_end)); - fra->fr_off = off; + h->ip_id, off, max, fra->fe_off, fra->fe_len)); + fra->fe_off = off; merge = 1; } else if (aftercut > 0) { /* Need to chop off the tail of this fragment */ DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n", - h->ip_id, aftercut, off, max, fra->fr_off, - fra->fr_end)); - fra->fr_off = off; + h->ip_id, aftercut, off, max, fra->fe_off, + fra->fe_len)); + fra->fe_off = off; max -= aftercut; merge = 1; @@ -756,42 +980,42 @@ } else if (frp == NULL) { /* There is a gap between fragments */ DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n", - h->ip_id, -aftercut, off, max, fra->fr_off, - fra->fr_end)); + h->ip_id, -aftercut, off, max, fra->fe_off, + fra->fe_len)); cur = uma_zalloc(V_pf_frent_z, M_NOWAIT); if (cur == NULL) goto no_mem; - cur->fr_off = off; - cur->fr_end = max; - LIST_INSERT_BEFORE(fra, cur, fr_next); + cur->fe_off = off; + cur->fe_len = max; + TAILQ_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next); } /* Need to glue together two separate fragment descriptors */ if (merge) { - if (cur && fra->fr_off <= cur->fr_end) { + if (cur && fra->fe_off <= cur->fe_len) { /* Need to merge in a previous 'cur' */ DPFPRINTF(("fragcache[%d]: adjacent(merge " "%d-%d) %d-%d (%d-%d)\n", - h->ip_id, cur->fr_off, cur->fr_end, off, - max, fra->fr_off, fra->fr_end)); - fra->fr_off = cur->fr_off; - LIST_REMOVE(cur, fr_next); + h->ip_id, cur->fe_off, cur->fe_len, off, + max, fra->fe_off, fra->fe_len)); + fra->fe_off = cur->fe_off; + TAILQ_REMOVE(&(*frag)->fr_queue, cur, fr_next); uma_zfree(V_pf_frent_z, cur); cur = NULL; - } else if (frp && fra->fr_off <= frp->fr_end) { + } else if (frp && fra->fe_off <= frp->fe_len) { /* Need to merge in a modified 'frp' */ KASSERT((cur == NULL), ("cur != NULL: %s", __FUNCTION__)); DPFPRINTF(("fragcache[%d]: adjacent(merge " "%d-%d) %d-%d (%d-%d)\n", - h->ip_id, frp->fr_off, frp->fr_end, off, - max, fra->fr_off, fra->fr_end)); - fra->fr_off = frp->fr_off; - LIST_REMOVE(frp, fr_next); + h->ip_id, frp->fe_off, frp->fe_len, off, + max, fra->fe_off, fra->fe_len)); + fra->fe_off = frp->fe_off; + TAILQ_REMOVE(&(*frag)->fr_queue, frp, fr_next); uma_zfree(V_pf_frent_z, frp); frp = NULL; @@ -820,8 +1044,8 @@ /* Check if we are completely reassembled */ if (((*frag)->fr_flags & PFFRAG_SEENLAST) && - LIST_FIRST(&(*frag)->fr_queue)->fr_off == 0 && - LIST_FIRST(&(*frag)->fr_queue)->fr_end == (*frag)->fr_max) { + TAILQ_FIRST(&(*frag)->fr_queue)->fe_off == 0 && + TAILQ_FIRST(&(*frag)->fr_queue)->fe_len == (*frag)->fr_max) { /* Remove from fragment queue */ DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id, (*frag)->fr_max)); @@ -865,8 +1089,8 @@ { struct mbuf *m = *m0; struct pf_rule *r; - struct pf_frent *frent; struct pf_fragment *frag = NULL; + struct pf_fragment_cmp key; struct ip *h = mtod(m, struct ip *); int mff = (ntohs(h->ip_off) & IP_MF); int hlen = h->ip_hl << 2; @@ -875,6 +1099,7 @@ int ip_len; int ip_off; int tag = -1; + int verdict; PF_RULES_RASSERT(); @@ -959,28 +1184,24 @@ /* Fully buffer all of the fragments */ PF_FRAG_LOCK(); - frag = pf_find_fragment(h, &V_pf_frag_tree); + + pf_ip2key(h, dir, &key); + frag = pf_find_fragment(&key, &V_pf_frag_tree); /* Check if we saw the last fragment already */ if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && max > frag->fr_max) goto bad; - /* Get an entry for the fragment queue */ - frent = uma_zalloc(V_pf_frent_z, M_NOWAIT); - if (frent == NULL) { - PF_FRAG_UNLOCK(); - REASON_SET(reason, PFRES_MEMORY); - return (PF_DROP); - } - frent->fr_ip = h; - frent->fr_m = m; - /* Might return a completely reassembled mbuf, or NULL */ DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max)); - *m0 = m = pf_reassemble(m0, &frag, frent, mff); + verdict = pf_reassemble(m0, h, dir, reason); PF_FRAG_UNLOCK(); + if (verdict != PF_PASS) + return (PF_DROP); + + m = *m0; if (m == NULL) return (PF_DROP); @@ -1014,7 +1235,8 @@ } PF_FRAG_LOCK(); - frag = pf_find_fragment(h, &V_pf_cache_tree); + pf_ip2key(h, dir, &key); + frag = pf_find_fragment(&key, &V_pf_cache_tree); /* Check if we saw the last fragment already */ if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && @@ -1111,13 +1333,13 @@ struct mbuf *m = *m0; struct pf_rule *r; struct ip6_hdr *h = mtod(m, struct ip6_hdr *); + int extoff; int off; struct ip6_ext ext; struct ip6_opt opt; struct ip6_opt_jumbo jumbo; struct ip6_frag frag; u_int32_t jumbolen = 0, plen; - u_int16_t fragoff = 0; int optend; int ooff; u_int8_t proto; @@ -1161,6 +1383,7 @@ if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len) goto drop; + extoff = 0; off = sizeof(struct ip6_hdr); proto = h->ip6_nxt; terminal = 0; @@ -1175,6 +1398,7 @@ if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, NULL, AF_INET6)) goto shortpkt; + extoff = off; if (proto == IPPROTO_AH) off += (ext.ip6e_len + 2) * 4; else @@ -1185,6 +1409,7 @@ if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, NULL, AF_INET6)) goto shortpkt; + extoff = off; optend = off + (ext.ip6e_len + 1) * 8; ooff = off + sizeof(ext); do { @@ -1248,18 +1473,27 @@ return (PF_PASS); fragment: - if (ntohs(h->ip6_plen) == 0 || jumbolen) - goto drop; + /* Jumbo payload packets cannot be fragmented. */ plen = ntohs(h->ip6_plen); + if (plen == 0 || jumbolen) + goto drop; + if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len) + goto shortpkt; if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6)) goto shortpkt; - fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK); - if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET) - goto badfrag; - /* do something about it */ - /* remember to set pd->flags |= PFDESC_IP_REAS */ + /* Offset now points to data portion. */ + off += sizeof(frag); + + /* Returns PF_DROP or *m0 is NULL or completely reassembled mbuf. */ + if (pf_reassemble6(m0, h, &frag, off, extoff, dir, reason) != PF_PASS) + return (PF_DROP); + m = *m0; + if (m == NULL) + return (PF_DROP); + + pd->flags |= PFDESC_IP_REAS; return (PF_PASS); shortpkt: @@ -1275,13 +1509,6 @@ PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd, 1); return (PF_DROP); - - badfrag: - REASON_SET(reason, PFRES_FRAG); - if (r != NULL && r->log) - PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd, - 1); - return (PF_DROP); } #endif /* INET6 */