Index: sys/net/pfvar.h =================================================================== --- sys/net/pfvar.h +++ sys/net/pfvar.h @@ -1204,6 +1204,12 @@ #define PFFRAG_FRENT_HIWAT 5000 /* Number of fragment entries */ #define PFR_KENTRY_HIWAT 200000 /* Number of table entries */ +/* + * Limit the length of the fragment queue traversal. Remember + * search entry points based on the fragment offset. + */ +#define PF_FRAG_ENTRY_POINTS 16 + /* * ioctl parameter structures */ Index: sys/netpfil/pf/pf_norm.c =================================================================== --- sys/netpfil/pf/pf_norm.c +++ sys/netpfil/pf/pf_norm.c @@ -87,6 +87,7 @@ #define fr_af fr_key.frc_af #define fr_proto fr_key.frc_proto + struct pf_frent *fr_firstoff[PF_FRAG_ENTRY_POINTS]; RB_ENTRY(pf_fragment) fr_entry; TAILQ_ENTRY(pf_fragment) frag_next; uint32_t fr_timeout; @@ -136,6 +137,13 @@ static int pf_frent_holes(struct pf_frent *frent); static struct pf_fragment *pf_find_fragment(struct pf_fragment_cmp *key, struct pf_frag_tree *tree); +static inline int pf_frent_index(struct pf_frent *); +static void pf_frent_insert(struct pf_fragment *, + struct pf_frent *, struct pf_frent *); +void pf_frent_remove(struct pf_fragment *, + struct pf_frent *); +struct pf_frent *pf_frent_previous(struct pf_fragment *, + struct pf_frent *); static struct pf_fragment *pf_fillup_fragment(struct pf_fragment_cmp *, struct pf_frent *, u_short *); static struct mbuf *pf_join_fragment(struct pf_fragment *); @@ -308,6 +316,7 @@ { PF_FRAG_ASSERT(); + KASSERT(frag, ("frag != NULL")); RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag); TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next); @@ -367,9 +376,150 @@ return holes; } +static inline int +pf_frent_index(struct pf_frent *frent) +{ + /* + * We have an array of 16 entry points to the queue. A full size + * 65535 octet IP packet can have 8192 fragments. So the queue + * traversal length is at most 512 and at most 16 entry points are + * checked. We need 128 additional bytes on a 64 bit architecture. + */ + CTASSERT(((u_int16_t)0xffff &~ 7) / (0x10000 / PF_FRAG_ENTRY_POINTS) == + 16 - 1); + CTASSERT(((u_int16_t)0xffff >> 3) / PF_FRAG_ENTRY_POINTS == 512 - 1); + + return frent->fe_off / (0x10000 / PF_FRAG_ENTRY_POINTS); +} + +static void +pf_frent_insert(struct pf_fragment *frag, struct pf_frent *frent, + struct pf_frent *prev) +{ + int index; + + if (prev == NULL) { + TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); + } else { + KASSERT(prev->fe_off + prev->fe_len <= frent->fe_off, + ("overlapping fragment")); + TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next); + } + + index = pf_frent_index(frent); + if (frag->fr_firstoff[index] == NULL) { + KASSERT(prev == NULL || pf_frent_index(prev) < index, + ("prev == NULL || pf_frent_index(pref) < index")); + frag->fr_firstoff[index] = frent; + } else { + if (frent->fe_off < frag->fr_firstoff[index]->fe_off) { + KASSERT(prev == NULL || pf_frent_index(prev) < index, + ("prev == NULL || pf_frent_index(pref) < index")); + frag->fr_firstoff[index] = frent; + } else { + KASSERT(prev != NULL, ("prev != NULL")); + KASSERT(pf_frent_index(prev) == index, + ("pf_frent_index(prev) == index")); + } + } + + frag->fr_holes += pf_frent_holes(frent); +} + +void +pf_frent_remove(struct pf_fragment *frag, struct pf_frent *frent) +{ + struct pf_frent *prev = TAILQ_PREV(frent, pf_fragq, fr_next); + struct pf_frent *next = TAILQ_NEXT(frent, fr_next); + int index; + + frag->fr_holes -= pf_frent_holes(frent); + + index = pf_frent_index(frent); + KASSERT(frag->fr_firstoff[index] != NULL, ("frent not found")); + if (frag->fr_firstoff[index]->fe_off == frent->fe_off) { + if (next == NULL) { + frag->fr_firstoff[index] = NULL; + } else { + KASSERT(frent->fe_off + frent->fe_len <= next->fe_off, + ("overlapping fragment")); + if (pf_frent_index(next) == index) { + frag->fr_firstoff[index] = next; + } else { + frag->fr_firstoff[index] = NULL; + } + } + } else { + KASSERT(frag->fr_firstoff[index]->fe_off < frent->fe_off, + ("frag->fr_firstoff[index]->fe_off < frent->fe_off")); + KASSERT(prev != NULL, ("prev != NULL")); + KASSERT(prev->fe_off + prev->fe_len <= frent->fe_off, + ("overlapping fragment")); + KASSERT(pf_frent_index(prev) == index, + ("pf_frent_index(prev) == index")); + } + + TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); +} + +struct pf_frent * +pf_frent_previous(struct pf_fragment *frag, struct pf_frent *frent) +{ + struct pf_frent *prev, *next; + int index; + + /* + * If there are no fragments after frag, take the final one. Assume + * that the global queue is not empty. + */ + prev = TAILQ_LAST(&frag->fr_queue, pf_fragq); + KASSERT(prev != NULL, ("prev != NULL")); + if (prev->fe_off <= frent->fe_off) + return prev; + /* + * We want to find a fragment entry that is before frag, but still + * close to it. Find the first fragment entry that is in the same + * entry point or in the first entry point after that. As we have + * already checked that there are entries behind frag, this will + * succeed. + */ + for (index = pf_frent_index(frent); index < PF_FRAG_ENTRY_POINTS; + index++) { + prev = frag->fr_firstoff[index]; + if (prev != NULL) + break; + } + KASSERT(prev != NULL, ("prev != NULL")); + /* + * In prev we may have a fragment from the same entry point that is + * before frent, or one that is just one position behind frent. + * In the latter case, we go back one step and have the predecessor. + * There may be none if the new fragment will be the first one. + */ + if (prev->fe_off > frent->fe_off) { + prev = TAILQ_PREV(prev, pf_fragq, fr_next); + if (prev == NULL) + return NULL; + KASSERT(prev->fe_off <= frent->fe_off, + ("prev->fe_off <= frent->fe_off")); + return prev; + } + /* + * In prev is the first fragment of the entry point. The offset + * of frag is behind it. Find the closest previous fragment. + */ + for (next = TAILQ_NEXT(prev, fr_next); next != NULL; + next = TAILQ_NEXT(next, fr_next)) { + if (next->fe_off > frent->fe_off) + break; + prev = next; + } + return prev; +} + static struct pf_fragment * pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent, - u_short *reason) + u_short *reason) { struct pf_frent *after, *next, *prev; struct pf_fragment *frag; @@ -416,6 +566,7 @@ } *(struct pf_fragment_cmp *)frag = *key; + memset(frag->fr_firstoff, 0, sizeof(frag->fr_firstoff)); frag->fr_timeout = time_uptime; frag->fr_maxlen = frent->fe_len; frag->fr_holes = 1; @@ -425,8 +576,7 @@ TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next); /* We do not have a previous fragment. */ - TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); - frag->fr_holes += pf_frent_holes(frent); + pf_frent_insert(frag, frent, NULL); return (frag); } @@ -455,17 +605,15 @@ goto bad_fragment; } - /* Find a fragment after the current one. */ - prev = NULL; - TAILQ_FOREACH(after, &frag->fr_queue, fr_next) { - if (after->fe_off > frent->fe_off) - break; - prev = after; + /* Find neighbors for newly inserted fragment */ + prev = pf_frent_previous(frag, frent); + if (prev == NULL) { + after = TAILQ_FIRST(&frag->fr_queue); + KASSERT(after != NULL, ("after != NULL")); + } else { + after = TAILQ_NEXT(prev, fr_next); } - KASSERT(prev != NULL || after != NULL, - ("prev != NULL || after != NULL")); - if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) { uint16_t precut; @@ -493,17 +641,12 @@ /* This fragment is completely overlapped, lose it. */ next = TAILQ_NEXT(after, fr_next); - frag->fr_holes -= pf_frent_holes(after); + pf_frent_remove(frag, after); m_freem(after->fe_m); - TAILQ_REMOVE(&frag->fr_queue, after, fr_next); uma_zfree(V_pf_frent_z, after); } - if (prev == NULL) - TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); - else - TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next); - frag->fr_holes += pf_frent_holes(frent); + pf_frent_insert(frag, frent, prev); return (frag);