Index: head/sys/net/radix.c
===================================================================
--- head/sys/net/radix.c	(revision 272360)
+++ head/sys/net/radix.c	(revision 272361)
@@ -1,1198 +1,1195 @@
 /*-
  * Copyright (c) 1988, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)radix.c	8.5 (Berkeley) 5/19/95
  * $FreeBSD$
  */
 
 /*
  * Routines to build and maintain radix trees for routing lookups.
  */
 #include <sys/param.h>
 #ifdef	_KERNEL
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/rwlock.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/syslog.h>
 #include <net/radix.h>
 #include "opt_mpath.h"
 #ifdef RADIX_MPATH
 #include <net/radix_mpath.h>
 #endif
 #else /* !_KERNEL */
 #include <stdio.h>
 #include <strings.h>
 #include <stdlib.h>
 #define log(x, arg...)	fprintf(stderr, ## arg)
 #define panic(x)	fprintf(stderr, "PANIC: %s", x), exit(1)
 #define min(a, b) ((a) < (b) ? (a) : (b) )
 #include <net/radix.h>
 #endif /* !_KERNEL */
 
 static int	rn_walktree_from(struct radix_node_head *h, void *a, void *m,
 		    walktree_f_t *f, void *w);
 static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *);
 static struct radix_node
 	 *rn_insert(void *, struct radix_node_head *, int *,
 	     struct radix_node [2]),
 	 *rn_newpair(void *, int, struct radix_node[2]),
 	 *rn_search(void *, struct radix_node *),
 	 *rn_search_m(void *, struct radix_node *, void *);
 
 static void rn_detachhead_internal(void **head);
 static int rn_inithead_internal(void **head, int off);
 
 #define	RADIX_MAX_KEY_LEN	32
 
 static char rn_zeros[RADIX_MAX_KEY_LEN];
 static char rn_ones[RADIX_MAX_KEY_LEN] = {
 	-1, -1, -1, -1, -1, -1, -1, -1,
 	-1, -1, -1, -1, -1, -1, -1, -1,
 	-1, -1, -1, -1, -1, -1, -1, -1,
 	-1, -1, -1, -1, -1, -1, -1, -1,
 };
 
 
 static int	rn_lexobetter(void *m_arg, void *n_arg);
 static struct radix_mask *
 		rn_new_radix_mask(struct radix_node *tt,
 		    struct radix_mask *next);
 static int	rn_satisfies_leaf(char *trial, struct radix_node *leaf,
 		    int skip);
 
 /*
  * The data structure for the keys is a radix tree with one way
  * branching removed.  The index rn_bit at an internal node n represents a bit
  * position to be tested.  The tree is arranged so that all descendants
  * of a node n have keys whose bits all agree up to position rn_bit - 1.
  * (We say the index of n is rn_bit.)
  *
  * There is at least one descendant which has a one bit at position rn_bit,
  * and at least one with a zero there.
  *
  * A route is determined by a pair of key and mask.  We require that the
  * bit-wise logical and of the key and mask to be the key.
  * We define the index of a route to associated with the mask to be
  * the first bit number in the mask where 0 occurs (with bit number 0
  * representing the highest order bit).
  *
  * We say a mask is normal if every bit is 0, past the index of the mask.
  * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit,
  * and m is a normal mask, then the route applies to every descendant of n.
  * If the index(m) < rn_bit, this implies the trailing last few bits of k
  * before bit b are all 0, (and hence consequently true of every descendant
  * of n), so the route applies to all descendants of the node as well.
  *
  * Similar logic shows that a non-normal mask m such that
  * index(m) <= index(n) could potentially apply to many children of n.
  * Thus, for each non-host route, we attach its mask to a list at an internal
  * node as high in the tree as we can go.
  *
  * The present version of the code makes use of normal routes in short-
  * circuiting an explict mask and compare operation when testing whether
  * a key satisfies a normal route, and also in remembering the unique leaf
  * that governs a subtree.
  */
 
 /*
  * Most of the functions in this code assume that the key/mask arguments
  * are sockaddr-like structures, where the first byte is an u_char
  * indicating the size of the entire structure.
  *
  * To make the assumption more explicit, we use the LEN() macro to access
  * this field. It is safe to pass an expression with side effects
  * to LEN() as the argument is evaluated only once.
  * We cast the result to int as this is the dominant usage.
  */
 #define LEN(x) ( (int) (*(const u_char *)(x)) )
 
 /*
  * XXX THIS NEEDS TO BE FIXED
  * In the code, pointers to keys and masks are passed as either
  * 'void *' (because callers use to pass pointers of various kinds), or
  * 'caddr_t' (which is fine for pointer arithmetics, but not very
  * clean when you dereference it to access data). Furthermore, caddr_t
  * is really 'char *', while the natural type to operate on keys and
  * masks would be 'u_char'. This mismatch require a lot of casts and
  * intermediate variables to adapt types that clutter the code.
  */
 
 /*
  * Search a node in the tree matching the key.
  */
 static struct radix_node *
 rn_search(void *v_arg, struct radix_node *head)
 {
 	struct radix_node *x;
 	caddr_t v;
 
 	for (x = head, v = v_arg; x->rn_bit >= 0;) {
 		if (x->rn_bmask & v[x->rn_offset])
 			x = x->rn_right;
 		else
 			x = x->rn_left;
 	}
 	return (x);
 }
 
 /*
  * Same as above, but with an additional mask.
  * XXX note this function is used only once.
  */
 static struct radix_node *
 rn_search_m(void *v_arg, struct radix_node *head, void *m_arg)
 {
 	struct radix_node *x;
 	caddr_t v = v_arg, m = m_arg;
 
 	for (x = head; x->rn_bit >= 0;) {
 		if ((x->rn_bmask & m[x->rn_offset]) &&
 		    (x->rn_bmask & v[x->rn_offset]))
 			x = x->rn_right;
 		else
 			x = x->rn_left;
 	}
 	return (x);
 }
 
 int
 rn_refines(void *m_arg, void *n_arg)
 {
 	caddr_t m = m_arg, n = n_arg;
 	caddr_t lim, lim2 = lim = n + LEN(n);
 	int longer = LEN(n++) - LEN(m++);
 	int masks_are_equal = 1;
 
 	if (longer > 0)
 		lim -= longer;
 	while (n < lim) {
 		if (*n & ~(*m))
 			return (0);
 		if (*n++ != *m++)
 			masks_are_equal = 0;
 	}
 	while (n < lim2)
 		if (*n++)
 			return (0);
 	if (masks_are_equal && (longer < 0))
 		for (lim2 = m - longer; m < lim2; )
 			if (*m++)
 				return (1);
 	return (!masks_are_equal);
 }
 
 /*
  * Search for exact match in given @head.
  * Assume host bits are cleared in @v_arg if @m_arg is not NULL
  * Note that prefixes with /32 or /128 masks are treated differently
  * from host routes.
  */
 struct radix_node *
 rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head)
 {
 	struct radix_node *x;
 	caddr_t netmask;
 
 	if (m_arg != NULL) {
 		/*
 		 * Most common case: search exact prefix/mask
 		 */
 		x = rn_addmask(m_arg, head->rnh_masks, 1,
 		    head->rnh_treetop->rn_offset);
 		if (x == NULL)
 			return (NULL);
 		netmask = x->rn_key;
 
 		x = rn_match(v_arg, head);
 
 		while (x != NULL && x->rn_mask != netmask)
 			x = x->rn_dupedkey;
 
 		return (x);
 	}
 
 	/*
 	 * Search for host address.
 	 */
 	if ((x = rn_match(v_arg, head)) == NULL)
 		return (NULL);
 
 	/* Check if found key is the same */
 	if (LEN(x->rn_key) != LEN(v_arg) || bcmp(x->rn_key, v_arg, LEN(v_arg)))
 		return (NULL);
 
 	/* Check if this is not host route */
 	if (x->rn_mask != NULL)
 		return (NULL);
 
 	return (x);
 }
 
 static int
 rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip)
 {
 	char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask;
 	char *cplim;
 	int length = min(LEN(cp), LEN(cp2));
 
 	if (cp3 == NULL)
 		cp3 = rn_ones;
 	else
 		length = min(length, LEN(cp3));
 	cplim = cp + length; cp3 += skip; cp2 += skip;
 	for (cp += skip; cp < cplim; cp++, cp2++, cp3++)
 		if ((*cp ^ *cp2) & *cp3)
 			return (0);
 	return (1);
 }
 
 /*
  * Search for longest-prefix match in given @head
  */
 struct radix_node *
 rn_match(void *v_arg, struct radix_node_head *head)
 {
 	caddr_t v = v_arg;
 	struct radix_node *t = head->rnh_treetop, *x;
 	caddr_t cp = v, cp2;
 	caddr_t cplim;
 	struct radix_node *saved_t, *top = t;
 	int off = t->rn_offset, vlen = LEN(cp), matched_off;
 	int test, b, rn_bit;
 
 	/*
 	 * Open code rn_search(v, top) to avoid overhead of extra
 	 * subroutine call.
 	 */
 	for (; t->rn_bit >= 0; ) {
 		if (t->rn_bmask & cp[t->rn_offset])
 			t = t->rn_right;
 		else
 			t = t->rn_left;
 	}
 	/*
 	 * See if we match exactly as a host destination
 	 * or at least learn how many bits match, for normal mask finesse.
 	 *
 	 * It doesn't hurt us to limit how many bytes to check
 	 * to the length of the mask, since if it matches we had a genuine
 	 * match and the leaf we have is the most specific one anyway;
 	 * if it didn't match with a shorter length it would fail
 	 * with a long one.  This wins big for class B&C netmasks which
 	 * are probably the most common case...
 	 */
 	if (t->rn_mask)
 		vlen = *(u_char *)t->rn_mask;
 	cp += off; cp2 = t->rn_key + off; cplim = v + vlen;
 	for (; cp < cplim; cp++, cp2++)
 		if (*cp != *cp2)
 			goto on1;
 	/*
 	 * This extra grot is in case we are explicitly asked
 	 * to look up the default.  Ugh!
 	 *
 	 * Never return the root node itself, it seems to cause a
 	 * lot of confusion.
 	 */
 	if (t->rn_flags & RNF_ROOT)
 		t = t->rn_dupedkey;
 	return (t);
 on1:
 	test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
 	for (b = 7; (test >>= 1) > 0;)
 		b--;
 	matched_off = cp - v;
 	b += matched_off << 3;
 	rn_bit = -1 - b;
 	/*
 	 * If there is a host route in a duped-key chain, it will be first.
 	 */
 	if ((saved_t = t)->rn_mask == 0)
 		t = t->rn_dupedkey;
 	for (; t; t = t->rn_dupedkey)
 		/*
 		 * Even if we don't match exactly as a host,
 		 * we may match if the leaf we wound up at is
 		 * a route to a net.
 		 */
 		if (t->rn_flags & RNF_NORMAL) {
 			if (rn_bit <= t->rn_bit)
 				return (t);
 		} else if (rn_satisfies_leaf(v, t, matched_off))
 				return (t);
 	t = saved_t;
 	/* start searching up the tree */
 	do {
 		struct radix_mask *m;
 		t = t->rn_parent;
 		m = t->rn_mklist;
 		/*
 		 * If non-contiguous masks ever become important
 		 * we can restore the masking and open coding of
 		 * the search and satisfaction test and put the
 		 * calculation of "off" back before the "do".
 		 */
 		while (m) {
 			if (m->rm_flags & RNF_NORMAL) {
 				if (rn_bit <= m->rm_bit)
 					return (m->rm_leaf);
 			} else {
 				off = min(t->rn_offset, matched_off);
 				x = rn_search_m(v, t, m->rm_mask);
 				while (x && x->rn_mask != m->rm_mask)
 					x = x->rn_dupedkey;
 				if (x && rn_satisfies_leaf(v, x, off))
 					return (x);
 			}
 			m = m->rm_mklist;
 		}
 	} while (t != top);
 	return (0);
 }
 
 #ifdef RN_DEBUG
 int	rn_nodenum;
 struct	radix_node *rn_clist;
 int	rn_saveinfo;
 int	rn_debug =  1;
 #endif
 
 /*
  * Whenever we add a new leaf to the tree, we also add a parent node,
  * so we allocate them as an array of two elements: the first one must be
  * the leaf (see RNTORT() in route.c), the second one is the parent.
  * This routine initializes the relevant fields of the nodes, so that
  * the leaf is the left child of the parent node, and both nodes have
  * (almost) all all fields filled as appropriate.
  * (XXX some fields are left unset, see the '#if 0' section).
  * The function returns a pointer to the parent node.
  */
 
 static struct radix_node *
 rn_newpair(void *v, int b, struct radix_node nodes[2])
 {
 	struct radix_node *tt = nodes, *t = tt + 1;
 	t->rn_bit = b;
 	t->rn_bmask = 0x80 >> (b & 7);
 	t->rn_left = tt;
 	t->rn_offset = b >> 3;
 
 #if 0  /* XXX perhaps we should fill these fields as well. */
 	t->rn_parent = t->rn_right = NULL;
 
 	tt->rn_mask = NULL;
 	tt->rn_dupedkey = NULL;
 	tt->rn_bmask = 0;
 #endif
 	tt->rn_bit = -1;
 	tt->rn_key = (caddr_t)v;
 	tt->rn_parent = t;
 	tt->rn_flags = t->rn_flags = RNF_ACTIVE;
 	tt->rn_mklist = t->rn_mklist = 0;
 #ifdef RN_DEBUG
 	tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
 	tt->rn_twin = t;
 	tt->rn_ybro = rn_clist;
 	rn_clist = tt;
 #endif
 	return (t);
 }
 
 static struct radix_node *
 rn_insert(void *v_arg, struct radix_node_head *head, int *dupentry,
     struct radix_node nodes[2])
 {
 	caddr_t v = v_arg;
 	struct radix_node *top = head->rnh_treetop;
 	int head_off = top->rn_offset, vlen = LEN(v);
 	struct radix_node *t = rn_search(v_arg, top);
 	caddr_t cp = v + head_off;
 	int b;
 	struct radix_node *p, *tt, *x;
     	/*
 	 * Find first bit at which v and t->rn_key differ
 	 */
 	caddr_t cp2 = t->rn_key + head_off;
 	int cmp_res;
 	caddr_t cplim = v + vlen;
 
 	while (cp < cplim)
 		if (*cp2++ != *cp++)
 			goto on1;
 	*dupentry = 1;
 	return (t);
 on1:
 	*dupentry = 0;
 	cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
 	for (b = (cp - v) << 3; cmp_res; b--)
 		cmp_res >>= 1;
 
 	x = top;
 	cp = v;
 	do {
 		p = x;
 		if (cp[x->rn_offset] & x->rn_bmask)
 			x = x->rn_right;
 		else
 			x = x->rn_left;
 	} while (b > (unsigned) x->rn_bit);
 				/* x->rn_bit < b && x->rn_bit >= 0 */
 #ifdef RN_DEBUG
 	if (rn_debug)
 		log(LOG_DEBUG, "rn_insert: Going In:\n"), traverse(p);
 #endif
 	t = rn_newpair(v_arg, b, nodes); 
 	tt = t->rn_left;
 	if ((cp[p->rn_offset] & p->rn_bmask) == 0)
 		p->rn_left = t;
 	else
 		p->rn_right = t;
 	x->rn_parent = t;
 	t->rn_parent = p; /* frees x, p as temp vars below */
 	if ((cp[t->rn_offset] & t->rn_bmask) == 0) {
 		t->rn_right = x;
 	} else {
 		t->rn_right = tt;
 		t->rn_left = x;
 	}
 #ifdef RN_DEBUG
 	if (rn_debug)
 		log(LOG_DEBUG, "rn_insert: Coming Out:\n"), traverse(p);
 #endif
 	return (tt);
 }
 
 struct radix_node *
 rn_addmask(void *n_arg, struct radix_node_head *maskhead, int search, int skip)
 {
 	unsigned char *netmask = n_arg;
 	unsigned char *cp, *cplim;
 	struct radix_node *x;
 	int b = 0, mlen, j;
 	int maskduplicated, isnormal;
 	struct radix_node *saved_x;
 	unsigned char addmask_key[RADIX_MAX_KEY_LEN];
 
 	if ((mlen = LEN(netmask)) > RADIX_MAX_KEY_LEN)
 		mlen = RADIX_MAX_KEY_LEN;
 	if (skip == 0)
 		skip = 1;
 	if (mlen <= skip)
 		return (maskhead->rnh_nodes);
 
 	bzero(addmask_key, RADIX_MAX_KEY_LEN);
 	if (skip > 1)
 		bcopy(rn_ones + 1, addmask_key + 1, skip - 1);
 	bcopy(netmask + skip, addmask_key + skip, mlen - skip);
 	/*
 	 * Trim trailing zeroes.
 	 */
 	for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;)
 		cp--;
 	mlen = cp - addmask_key;
 	if (mlen <= skip)
 		return (maskhead->rnh_nodes);
 	*addmask_key = mlen;
 	x = rn_search(addmask_key, maskhead->rnh_treetop);
 	if (bcmp(addmask_key, x->rn_key, mlen) != 0)
 		x = 0;
 	if (x || search)
 		return (x);
 	R_Zalloc(x, struct radix_node *, RADIX_MAX_KEY_LEN + 2 * sizeof (*x));
 	if ((saved_x = x) == 0)
 		return (0);
 	netmask = cp = (caddr_t)(x + 2);
 	bcopy(addmask_key, cp, mlen);
 	x = rn_insert(cp, maskhead, &maskduplicated, x);
 	if (maskduplicated) {
 		log(LOG_ERR, "rn_addmask: mask impossibly already in tree");
 		Free(saved_x);
 		return (x);
 	}
 	/*
 	 * Calculate index of mask, and check for normalcy.
 	 * First find the first byte with a 0 bit, then if there are
 	 * more bits left (remember we already trimmed the trailing 0's),
 	 * the bits should be contiguous, otherwise we have got
 	 * a non-contiguous mask.
 	 */
 #define	CONTIG(_c)	(((~(_c) + 1) & (_c)) == (unsigned char)(~(_c) + 1))
 	cplim = netmask + mlen;
 	isnormal = 1;
 	for (cp = netmask + skip; (cp < cplim) && *(u_char *)cp == 0xff;)
 		cp++;
 	if (cp != cplim) {
 		for (j = 0x80; (j & *cp) != 0; j >>= 1)
 			b++;
 		if (!CONTIG(*cp) || cp != (cplim - 1))
 			isnormal = 0;
 	}
 	b += (cp - netmask) << 3;
 	x->rn_bit = -1 - b;
 	if (isnormal)
 		x->rn_flags |= RNF_NORMAL;
 	return (x);
 }
 
 static int	/* XXX: arbitrary ordering for non-contiguous masks */
 rn_lexobetter(void *m_arg, void *n_arg)
 {
 	u_char *mp = m_arg, *np = n_arg, *lim;
 
 	if (LEN(mp) > LEN(np))
 		return (1);  /* not really, but need to check longer one first */
 	if (LEN(mp) == LEN(np))
 		for (lim = mp + LEN(mp); mp < lim;)
 			if (*mp++ > *np++)
 				return (1);
 	return (0);
 }
 
 static struct radix_mask *
 rn_new_radix_mask(struct radix_node *tt, struct radix_mask *next)
 {
 	struct radix_mask *m;
 
 	R_Malloc(m, struct radix_mask *, sizeof (struct radix_mask));
 	if (m == NULL) {
 		log(LOG_ERR, "Failed to allocate route mask\n");
 		return (0);
 	}
 	bzero(m, sizeof(*m));
 	m->rm_bit = tt->rn_bit;
 	m->rm_flags = tt->rn_flags;
 	if (tt->rn_flags & RNF_NORMAL)
 		m->rm_leaf = tt;
 	else
 		m->rm_mask = tt->rn_mask;
 	m->rm_mklist = next;
 	tt->rn_mklist = m;
 	return (m);
 }
 
 struct radix_node *
 rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
     struct radix_node treenodes[2])
 {
 	caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg;
 	struct radix_node *t, *x = 0, *tt;
 	struct radix_node *saved_tt, *top = head->rnh_treetop;
 	short b = 0, b_leaf = 0;
 	int keyduplicated;
 	caddr_t mmask;
 	struct radix_mask *m, **mp;
 
 	/*
 	 * In dealing with non-contiguous masks, there may be
 	 * many different routes which have the same mask.
 	 * We will find it useful to have a unique pointer to
 	 * the mask to speed avoiding duplicate references at
 	 * nodes and possibly save time in calculating indices.
 	 */
 	if (netmask)  {
 		x = rn_addmask(netmask, head->rnh_masks, 0, top->rn_offset);
 		if (x == NULL)
 			return (0);
 		b_leaf = x->rn_bit;
 		b = -1 - x->rn_bit;
 		netmask = x->rn_key;
 	}
 	/*
 	 * Deal with duplicated keys: attach node to previous instance
 	 */
 	saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes);
 	if (keyduplicated) {
 		for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) {
 #ifdef RADIX_MPATH
 			/* permit multipath, if enabled for the family */
 			if (rn_mpath_capable(head) && netmask == tt->rn_mask) {
 				/*
 				 * go down to the end of multipaths, so that
 				 * new entry goes into the end of rn_dupedkey
 				 * chain.
 				 */
 				do {
 					t = tt;
 					tt = tt->rn_dupedkey;
 				} while (tt && t->rn_mask == tt->rn_mask);
 				break;
 			}
 #endif
 			if (tt->rn_mask == netmask)
 				return (0);
 			if (netmask == 0 ||
 			    (tt->rn_mask &&
 			     ((b_leaf < tt->rn_bit) /* index(netmask) > node */
 			      || rn_refines(netmask, tt->rn_mask)
 			      || rn_lexobetter(netmask, tt->rn_mask))))
 				break;
 		}
 		/*
 		 * If the mask is not duplicated, we wouldn't
 		 * find it among possible duplicate key entries
 		 * anyway, so the above test doesn't hurt.
 		 *
 		 * We sort the masks for a duplicated key the same way as
 		 * in a masklist -- most specific to least specific.
 		 * This may require the unfortunate nuisance of relocating
 		 * the head of the list.
 		 *
 		 * We also reverse, or doubly link the list through the
 		 * parent pointer.
 		 */
 		if (tt == saved_tt) {
 			struct	radix_node *xx = x;
 			/* link in at head of list */
 			(tt = treenodes)->rn_dupedkey = t;
 			tt->rn_flags = t->rn_flags;
 			tt->rn_parent = x = t->rn_parent;
 			t->rn_parent = tt;	 		/* parent */
 			if (x->rn_left == t)
 				x->rn_left = tt;
 			else
 				x->rn_right = tt;
 			saved_tt = tt; x = xx;
 		} else {
 			(tt = treenodes)->rn_dupedkey = t->rn_dupedkey;
 			t->rn_dupedkey = tt;
 			tt->rn_parent = t;			/* parent */
 			if (tt->rn_dupedkey)			/* parent */
 				tt->rn_dupedkey->rn_parent = tt; /* parent */
 		}
 #ifdef RN_DEBUG
 		t=tt+1; tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
 		tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt;
 #endif
 		tt->rn_key = (caddr_t) v;
 		tt->rn_bit = -1;
 		tt->rn_flags = RNF_ACTIVE;
 	}
 	/*
 	 * Put mask in tree.
 	 */
 	if (netmask) {
 		tt->rn_mask = netmask;
 		tt->rn_bit = x->rn_bit;
 		tt->rn_flags |= x->rn_flags & RNF_NORMAL;
 	}
 	t = saved_tt->rn_parent;
 	if (keyduplicated)
 		goto on2;
 	b_leaf = -1 - t->rn_bit;
 	if (t->rn_right == saved_tt)
 		x = t->rn_left;
 	else
 		x = t->rn_right;
 	/* Promote general routes from below */
 	if (x->rn_bit < 0) {
 	    for (mp = &t->rn_mklist; x; x = x->rn_dupedkey)
 		if (x->rn_mask && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) {
 			*mp = m = rn_new_radix_mask(x, 0);
 			if (m)
 				mp = &m->rm_mklist;
 		}
 	} else if (x->rn_mklist) {
 		/*
 		 * Skip over masks whose index is > that of new node
 		 */
 		for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist)
 			if (m->rm_bit >= b_leaf)
 				break;
 		t->rn_mklist = m; *mp = 0;
 	}
 on2:
 	/* Add new route to highest possible ancestor's list */
 	if ((netmask == 0) || (b > t->rn_bit ))
 		return (tt); /* can't lift at all */
 	b_leaf = tt->rn_bit;
 	do {
 		x = t;
 		t = t->rn_parent;
 	} while (b <= t->rn_bit && x != top);
 	/*
 	 * Search through routes associated with node to
 	 * insert new route according to index.
 	 * Need same criteria as when sorting dupedkeys to avoid
 	 * double loop on deletion.
 	 */
 	for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) {
 		if (m->rm_bit < b_leaf)
 			continue;
 		if (m->rm_bit > b_leaf)
 			break;
 		if (m->rm_flags & RNF_NORMAL) {
 			mmask = m->rm_leaf->rn_mask;
 			if (tt->rn_flags & RNF_NORMAL) {
 #if !defined(RADIX_MPATH)
 			    log(LOG_ERR,
 			        "Non-unique normal route, mask not entered\n");
 #endif
 				return (tt);
 			}
 		} else
 			mmask = m->rm_mask;
 		if (mmask == netmask) {
 			m->rm_refs++;
 			tt->rn_mklist = m;
 			return (tt);
 		}
 		if (rn_refines(netmask, mmask)
 		    || rn_lexobetter(netmask, mmask))
 			break;
 	}
 	*mp = rn_new_radix_mask(tt, *mp);
 	return (tt);
 }
 
 struct radix_node *
 rn_delete(void *v_arg, void *netmask_arg, struct radix_node_head *head)
 {
 	struct radix_node *t, *p, *x, *tt;
 	struct radix_mask *m, *saved_m, **mp;
 	struct radix_node *dupedkey, *saved_tt, *top;
 	caddr_t v, netmask;
 	int b, head_off, vlen;
 
 	v = v_arg;
 	netmask = netmask_arg;
 	x = head->rnh_treetop;
 	tt = rn_search(v, x);
 	head_off = x->rn_offset;
 	vlen =  LEN(v);
 	saved_tt = tt;
 	top = x;
 	if (tt == 0 ||
 	    bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off))
 		return (0);
 	/*
 	 * Delete our route from mask lists.
 	 */
 	if (netmask) {
 		x = rn_addmask(netmask, head->rnh_masks, 1, head_off);
 		if (x == NULL)
 			return (0);
 		netmask = x->rn_key;
 		while (tt->rn_mask != netmask)
 			if ((tt = tt->rn_dupedkey) == 0)
 				return (0);
 	}
 	if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0)
 		goto on1;
 	if (tt->rn_flags & RNF_NORMAL) {
 		if (m->rm_leaf != tt || m->rm_refs > 0) {
 			log(LOG_ERR, "rn_delete: inconsistent annotation\n");
 			return (0);  /* dangling ref could cause disaster */
 		}
 	} else {
 		if (m->rm_mask != tt->rn_mask) {
 			log(LOG_ERR, "rn_delete: inconsistent annotation\n");
 			goto on1;
 		}
 		if (--m->rm_refs >= 0)
 			goto on1;
 	}
 	b = -1 - tt->rn_bit;
 	t = saved_tt->rn_parent;
 	if (b > t->rn_bit)
 		goto on1; /* Wasn't lifted at all */
 	do {
 		x = t;
 		t = t->rn_parent;
 	} while (b <= t->rn_bit && x != top);
 	for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist)
 		if (m == saved_m) {
 			*mp = m->rm_mklist;
 			Free(m);
 			break;
 		}
 	if (m == 0) {
 		log(LOG_ERR, "rn_delete: couldn't find our annotation\n");
 		if (tt->rn_flags & RNF_NORMAL)
 			return (0); /* Dangling ref to us */
 	}
 on1:
 	/*
 	 * Eliminate us from tree
 	 */
 	if (tt->rn_flags & RNF_ROOT)
 		return (0);
 #ifdef RN_DEBUG
 	/* Get us out of the creation list */
 	for (t = rn_clist; t && t->rn_ybro != tt; t = t->rn_ybro) {}
 	if (t) t->rn_ybro = tt->rn_ybro;
 #endif
 	t = tt->rn_parent;
 	dupedkey = saved_tt->rn_dupedkey;
 	if (dupedkey) {
 		/*
 		 * Here, tt is the deletion target and
 		 * saved_tt is the head of the dupekey chain.
 		 */
 		if (tt == saved_tt) {
 			/* remove from head of chain */
 			x = dupedkey; x->rn_parent = t;
 			if (t->rn_left == tt)
 				t->rn_left = x;
 			else
 				t->rn_right = x;
 		} else {
 			/* find node in front of tt on the chain */
 			for (x = p = saved_tt; p && p->rn_dupedkey != tt;)
 				p = p->rn_dupedkey;
 			if (p) {
 				p->rn_dupedkey = tt->rn_dupedkey;
 				if (tt->rn_dupedkey)		/* parent */
 					tt->rn_dupedkey->rn_parent = p;
 								/* parent */
 			} else log(LOG_ERR, "rn_delete: couldn't find us\n");
 		}
 		t = tt + 1;
 		if  (t->rn_flags & RNF_ACTIVE) {
 #ifndef RN_DEBUG
 			*++x = *t;
 			p = t->rn_parent;
 #else
 			b = t->rn_info;
 			*++x = *t;
 			t->rn_info = b;
 			p = t->rn_parent;
 #endif
 			if (p->rn_left == t)
 				p->rn_left = x;
 			else
 				p->rn_right = x;
 			x->rn_left->rn_parent = x;
 			x->rn_right->rn_parent = x;
 		}
 		goto out;
 	}
 	if (t->rn_left == tt)
 		x = t->rn_right;
 	else
 		x = t->rn_left;
 	p = t->rn_parent;
 	if (p->rn_right == t)
 		p->rn_right = x;
 	else
 		p->rn_left = x;
 	x->rn_parent = p;
 	/*
 	 * Demote routes attached to us.
 	 */
 	if (t->rn_mklist) {
 		if (x->rn_bit >= 0) {
 			for (mp = &x->rn_mklist; (m = *mp);)
 				mp = &m->rm_mklist;
 			*mp = t->rn_mklist;
 		} else {
 			/* If there are any key,mask pairs in a sibling
 			   duped-key chain, some subset will appear sorted
 			   in the same order attached to our mklist */
 			for (m = t->rn_mklist; m && x; x = x->rn_dupedkey)
 				if (m == x->rn_mklist) {
 					struct radix_mask *mm = m->rm_mklist;
 					x->rn_mklist = 0;
 					if (--(m->rm_refs) < 0)
 						Free(m);
 					m = mm;
 				}
 			if (m)
 				log(LOG_ERR,
 				    "rn_delete: Orphaned Mask %p at %p\n",
 				    m, x);
 		}
 	}
 	/*
 	 * We may be holding an active internal node in the tree.
 	 */
 	x = tt + 1;
 	if (t != x) {
 #ifndef RN_DEBUG
 		*t = *x;
 #else
 		b = t->rn_info;
 		*t = *x;
 		t->rn_info = b;
 #endif
 		t->rn_left->rn_parent = t;
 		t->rn_right->rn_parent = t;
 		p = x->rn_parent;
 		if (p->rn_left == x)
 			p->rn_left = t;
 		else
 			p->rn_right = t;
 	}
 out:
 	tt->rn_flags &= ~RNF_ACTIVE;
 	tt[1].rn_flags &= ~RNF_ACTIVE;
 	return (tt);
 }
 
 /*
  * This is the same as rn_walktree() except for the parameters and the
  * exit.
  */
 static int
 rn_walktree_from(struct radix_node_head *h, void *a, void *m,
     walktree_f_t *f, void *w)
 {
 	int error;
 	struct radix_node *base, *next;
 	u_char *xa = (u_char *)a;
 	u_char *xm = (u_char *)m;
 	struct radix_node *rn, *last = NULL; /* shut up gcc */
 	int stopping = 0;
 	int lastb;
 
 	KASSERT(m != NULL, ("%s: mask needs to be specified", __func__));
 
 	/*
 	 * rn_search_m is sort-of-open-coded here. We cannot use the
 	 * function because we need to keep track of the last node seen.
 	 */
 	/* printf("about to search\n"); */
 	for (rn = h->rnh_treetop; rn->rn_bit >= 0; ) {
 		last = rn;
 		/* printf("rn_bit %d, rn_bmask %x, xm[rn_offset] %x\n",
 		       rn->rn_bit, rn->rn_bmask, xm[rn->rn_offset]); */
 		if (!(rn->rn_bmask & xm[rn->rn_offset])) {
 			break;
 		}
 		if (rn->rn_bmask & xa[rn->rn_offset]) {
 			rn = rn->rn_right;
 		} else {
 			rn = rn->rn_left;
 		}
 	}
 	/* printf("done searching\n"); */
 
 	/*
 	 * Two cases: either we stepped off the end of our mask,
 	 * in which case last == rn, or we reached a leaf, in which
 	 * case we want to start from the leaf.
 	 */
 	if (rn->rn_bit >= 0)
 		rn = last;
 	lastb = last->rn_bit;
 
 	/* printf("rn %p, lastb %d\n", rn, lastb);*/
 
 	/*
 	 * This gets complicated because we may delete the node
 	 * while applying the function f to it, so we need to calculate
 	 * the successor node in advance.
 	 */
 	while (rn->rn_bit >= 0)
 		rn = rn->rn_left;
 
 	while (!stopping) {
 		/* printf("node %p (%d)\n", rn, rn->rn_bit); */
 		base = rn;
 		/* If at right child go back up, otherwise, go right */
 		while (rn->rn_parent->rn_right == rn
 		       && !(rn->rn_flags & RNF_ROOT)) {
 			rn = rn->rn_parent;
 
 			/* if went up beyond last, stop */
 			if (rn->rn_bit <= lastb) {
 				stopping = 1;
 				/* printf("up too far\n"); */
 				/*
 				 * XXX we should jump to the 'Process leaves'
 				 * part, because the values of 'rn' and 'next'
 				 * we compute will not be used. Not a big deal
 				 * because this loop will terminate, but it is
 				 * inefficient and hard to understand!
 				 */
 			}
 		}
 		
 		/* 
 		 * At the top of the tree, no need to traverse the right
 		 * half, prevent the traversal of the entire tree in the
 		 * case of default route.
 		 */
 		if (rn->rn_parent->rn_flags & RNF_ROOT)
 			stopping = 1;
 
 		/* Find the next *leaf* since next node might vanish, too */
 		for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;)
 			rn = rn->rn_left;
 		next = rn;
 		/* Process leaves */
 		while ((rn = base) != 0) {
 			base = rn->rn_dupedkey;
 			/* printf("leaf %p\n", rn); */
 			if (!(rn->rn_flags & RNF_ROOT)
 			    && (error = (*f)(rn, w)))
 				return (error);
 		}
 		rn = next;
 
 		if (rn->rn_flags & RNF_ROOT) {
 			/* printf("root, stopping"); */
 			stopping = 1;
 		}
 
 	}
 	return (0);
 }
 
 static int
 rn_walktree(struct radix_node_head *h, walktree_f_t *f, void *w)
 {
 	int error;
 	struct radix_node *base, *next;
 	struct radix_node *rn = h->rnh_treetop;
 	/*
 	 * This gets complicated because we may delete the node
 	 * while applying the function f to it, so we need to calculate
 	 * the successor node in advance.
 	 */
 
 	/* First time through node, go left */
 	while (rn->rn_bit >= 0)
 		rn = rn->rn_left;
 	for (;;) {
 		base = rn;
 		/* If at right child go back up, otherwise, go right */
 		while (rn->rn_parent->rn_right == rn
 		       && (rn->rn_flags & RNF_ROOT) == 0)
 			rn = rn->rn_parent;
 		/* Find the next *leaf* since next node might vanish, too */
 		for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;)
 			rn = rn->rn_left;
 		next = rn;
 		/* Process leaves */
 		while ((rn = base)) {
 			base = rn->rn_dupedkey;
 			if (!(rn->rn_flags & RNF_ROOT)
 			    && (error = (*f)(rn, w)))
 				return (error);
 		}
 		rn = next;
 		if (rn->rn_flags & RNF_ROOT)
 			return (0);
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Allocate and initialize an empty tree. This has 3 nodes, which are
  * part of the radix_node_head (in the order <left,root,right>) and are
  * marked RNF_ROOT so they cannot be freed.
  * The leaves have all-zero and all-one keys, with significant
  * bits starting at 'off'.
  * Return 1 on success, 0 on error.
  */
 static int
 rn_inithead_internal(void **head, int off)
 {
 	struct radix_node_head *rnh;
 	struct radix_node *t, *tt, *ttt;
 	if (*head)
 		return (1);
 	R_Zalloc(rnh, struct radix_node_head *, sizeof (*rnh));
 	if (rnh == 0)
 		return (0);
-#ifdef _KERNEL
-	RADIX_NODE_HEAD_LOCK_INIT(rnh);
-#endif
 	*head = rnh;
 	t = rn_newpair(rn_zeros, off, rnh->rnh_nodes);
 	ttt = rnh->rnh_nodes + 2;
 	t->rn_right = ttt;
 	t->rn_parent = t;
 	tt = t->rn_left;	/* ... which in turn is rnh->rnh_nodes */
 	tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE;
 	tt->rn_bit = -1 - off;
 	*ttt = *tt;
 	ttt->rn_key = rn_ones;
 	rnh->rnh_addaddr = rn_addroute;
 	rnh->rnh_deladdr = rn_delete;
 	rnh->rnh_matchaddr = rn_match;
 	rnh->rnh_lookup = rn_lookup;
 	rnh->rnh_walktree = rn_walktree;
 	rnh->rnh_walktree_from = rn_walktree_from;
 	rnh->rnh_treetop = t;
 	return (1);
 }
 
 static void
 rn_detachhead_internal(void **head)
 {
 	struct radix_node_head *rnh;
 
 	KASSERT((head != NULL && *head != NULL),
 	    ("%s: head already freed", __func__));
 	rnh = *head;
 	
 	/* Free <left,root,right> nodes. */
 	Free(rnh);
 
 	*head = NULL;
 }
 
 int
 rn_inithead(void **head, int off)
 {
 	struct radix_node_head *rnh;
 
 	if (*head != NULL)
 		return (1);
 
 	if (rn_inithead_internal(head, off) == 0)
 		return (0);
 
 	rnh = (struct radix_node_head *)(*head);
 
 	if (rn_inithead_internal((void **)&rnh->rnh_masks, 0) == 0) {
 		rn_detachhead_internal(head);
 		return (0);
 	}
 
 	return (1);
 }
 
 int
 rn_detachhead(void **head)
 {
 	struct radix_node_head *rnh;
 
 	KASSERT((head != NULL && *head != NULL),
 	    ("%s: head already freed", __func__));
 
 	rnh = *head;
 
 	rn_detachhead_internal((void **)&rnh->rnh_masks);
 	rn_detachhead_internal(head);
 	return (1);
 }
 
Index: head/sys/netinet/in_rmx.c
===================================================================
--- head/sys/netinet/in_rmx.c	(revision 272360)
+++ head/sys/netinet/in_rmx.c	(revision 272361)
@@ -1,507 +1,509 @@
 /*-
  * Copyright 1994, 1995 Massachusetts Institute of Technology
  *
  * Permission to use, copy, modify, and distribute this software and
  * its documentation for any purpose and without fee is hereby
  * granted, provided that both the above copyright notice and this
  * permission notice appear in all copies, that both the above
  * copyright notice and this permission notice appear in all
  * supporting documentation, and that the name of M.I.T. not be used
  * in advertising or publicity pertaining to distribution of the
  * software without specific, written prior permission.  M.I.T. makes
  * no representations about the suitability of this software for any
  * purpose.  It is provided "as is" without express or implied
  * warranty.
  *
  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/socket.h>
 #include <sys/mbuf.h>
 #include <sys/syslog.h>
 #include <sys/callout.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_var.h>
 
 extern int	in_inithead(void **head, int off);
 #ifdef VIMAGE
 extern int	in_detachhead(void **head, int off);
 #endif
 
 #define RTPRF_OURS		RTF_PROTO3	/* set on routes we manage */
 
 /*
  * Do what we need to do when inserting a route.
  */
 static struct radix_node *
 in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
     struct radix_node *treenodes)
 {
 	struct rtentry *rt = (struct rtentry *)treenodes;
 	struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt);
 
 	RADIX_NODE_HEAD_WLOCK_ASSERT(head);
 	/*
 	 * A little bit of help for both IP output and input:
 	 *   For host routes, we make sure that RTF_BROADCAST
 	 *   is set for anything that looks like a broadcast address.
 	 *   This way, we can avoid an expensive call to in_broadcast()
 	 *   in ip_output() most of the time (because the route passed
 	 *   to ip_output() is almost always a host route).
 	 *
 	 *   We also do the same for local addresses, with the thought
 	 *   that this might one day be used to speed up ip_input().
 	 *
 	 * We also mark routes to multicast addresses as such, because
 	 * it's easy to do and might be useful (but this is much more
 	 * dubious since it's so easy to inspect the address).
 	 */
 	if (rt->rt_flags & RTF_HOST) {
 		if (in_broadcast(sin->sin_addr, rt->rt_ifp)) {
 			rt->rt_flags |= RTF_BROADCAST;
 		} else if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr ==
 		    sin->sin_addr.s_addr) {
 			rt->rt_flags |= RTF_LOCAL;
 		}
 	}
 	if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 		rt->rt_flags |= RTF_MULTICAST;
 
 	if (rt->rt_mtu == 0 && rt->rt_ifp != NULL)
 		rt->rt_mtu = rt->rt_ifp->if_mtu;
 
 	return (rn_addroute(v_arg, n_arg, head, treenodes));
 }
 
 /*
  * This code is the inverse of in_clsroute: on first reference, if we
  * were managing the route, stop doing so and set the expiration timer
  * back off again.
  */
 static struct radix_node *
 in_matroute(void *v_arg, struct radix_node_head *head)
 {
 	struct radix_node *rn = rn_match(v_arg, head);
 	struct rtentry *rt = (struct rtentry *)rn;
 
 	if (rt) {
 		RT_LOCK(rt);
 		if (rt->rt_flags & RTPRF_OURS) {
 			rt->rt_flags &= ~RTPRF_OURS;
 			rt->rt_expire = 0;
 		}
 		RT_UNLOCK(rt);
 	}
 	return rn;
 }
 
 static VNET_DEFINE(int, rtq_reallyold) = 60*60; /* one hour is "really old" */
 #define	V_rtq_reallyold		VNET(rtq_reallyold)
 SYSCTL_VNET_INT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire, CTLFLAG_RW,
     &VNET_NAME(rtq_reallyold), 0,
     "Default expiration time on dynamically learned routes");
 
 /* never automatically crank down to less */
 static VNET_DEFINE(int, rtq_minreallyold) = 10;
 #define	V_rtq_minreallyold	VNET(rtq_minreallyold)
 SYSCTL_VNET_INT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW,
     &VNET_NAME(rtq_minreallyold), 0,
     "Minimum time to attempt to hold onto dynamically learned routes");
 
 /* 128 cached routes is "too many" */
 static VNET_DEFINE(int, rtq_toomany) = 128;
 #define	V_rtq_toomany		VNET(rtq_toomany)
 SYSCTL_VNET_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW,
     &VNET_NAME(rtq_toomany), 0,
     "Upper limit on dynamically learned routes");
 
 /*
  * On last reference drop, mark the route as belong to us so that it can be
  * timed out.
  */
 static void
 in_clsroute(struct radix_node *rn, struct radix_node_head *head)
 {
 	struct rtentry *rt = (struct rtentry *)rn;
 
 	RT_LOCK_ASSERT(rt);
 
 	if (!(rt->rt_flags & RTF_UP))
 		return;			/* prophylactic measures */
 
 	if (rt->rt_flags & RTPRF_OURS)
 		return;
 
 	if (!(rt->rt_flags & RTF_DYNAMIC))
 		return;
 
 	/*
 	 * If rtq_reallyold is 0, just delete the route without
 	 * waiting for a timeout cycle to kill it.
 	 */
 	if (V_rtq_reallyold != 0) {
 		rt->rt_flags |= RTPRF_OURS;
 		rt->rt_expire = time_uptime + V_rtq_reallyold;
 	} else
 		rt_expunge(head, rt);
 }
 
 struct rtqk_arg {
 	struct radix_node_head *rnh;
 	int draining;
 	int killed;
 	int found;
 	int updating;
 	time_t nextstop;
 };
 
 /*
  * Get rid of old routes.  When draining, this deletes everything, even when
  * the timeout is not expired yet.  When updating, this makes sure that
  * nothing has a timeout longer than the current value of rtq_reallyold.
  */
 static int
 in_rtqkill(struct radix_node *rn, void *rock)
 {
 	struct rtqk_arg *ap = rock;
 	struct rtentry *rt = (struct rtentry *)rn;
 	int err;
 
 	RADIX_NODE_HEAD_WLOCK_ASSERT(ap->rnh);
 
 	if (rt->rt_flags & RTPRF_OURS) {
 		ap->found++;
 
 		if (ap->draining || rt->rt_expire <= time_uptime) {
 			if (rt->rt_refcnt > 0)
 				panic("rtqkill route really not free");
 
 			err = in_rtrequest(RTM_DELETE,
 					(struct sockaddr *)rt_key(rt),
 					rt->rt_gateway, rt_mask(rt),
 					rt->rt_flags | RTF_RNH_LOCKED, 0,
 					rt->rt_fibnum);
 			if (err) {
 				log(LOG_WARNING, "in_rtqkill: error %d\n", err);
 			} else {
 				ap->killed++;
 			}
 		} else {
 			if (ap->updating &&
 			    (rt->rt_expire - time_uptime > V_rtq_reallyold))
 				rt->rt_expire = time_uptime + V_rtq_reallyold;
 			ap->nextstop = lmin(ap->nextstop, rt->rt_expire);
 		}
 	}
 
 	return 0;
 }
 
 #define RTQ_TIMEOUT	60*10	/* run no less than once every ten minutes */
 static VNET_DEFINE(int, rtq_timeout) = RTQ_TIMEOUT;
 static VNET_DEFINE(struct callout, rtq_timer);
 
 #define	V_rtq_timeout		VNET(rtq_timeout)
 #define	V_rtq_timer		VNET(rtq_timer)
 
 static void in_rtqtimo_one(void *rock);
 
 static void
 in_rtqtimo(void *rock)
 {
 	CURVNET_SET((struct vnet *) rock);
 	int fibnum;
 	void *newrock;
 	struct timeval atv;
 
 	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
 		newrock = rt_tables_get_rnh(fibnum, AF_INET);
 		if (newrock != NULL)
 			in_rtqtimo_one(newrock);
 	}
 	atv.tv_usec = 0;
 	atv.tv_sec = V_rtq_timeout;
 	callout_reset(&V_rtq_timer, tvtohz(&atv), in_rtqtimo, rock);
 	CURVNET_RESTORE();
 }
 
 static void
 in_rtqtimo_one(void *rock)
 {
 	struct radix_node_head *rnh = rock;
 	struct rtqk_arg arg;
 	static time_t last_adjusted_timeout = 0;
 
 	arg.found = arg.killed = 0;
 	arg.rnh = rnh;
 	arg.nextstop = time_uptime + V_rtq_timeout;
 	arg.draining = arg.updating = 0;
 	RADIX_NODE_HEAD_LOCK(rnh);
 	rnh->rnh_walktree(rnh, in_rtqkill, &arg);
 	RADIX_NODE_HEAD_UNLOCK(rnh);
 
 	/*
 	 * Attempt to be somewhat dynamic about this:
 	 * If there are ``too many'' routes sitting around taking up space,
 	 * then crank down the timeout, and see if we can't make some more
 	 * go away.  However, we make sure that we will never adjust more
 	 * than once in rtq_timeout seconds, to keep from cranking down too
 	 * hard.
 	 */
 	if ((arg.found - arg.killed > V_rtq_toomany) &&
 	    (time_uptime - last_adjusted_timeout >= V_rtq_timeout) &&
 	    V_rtq_reallyold > V_rtq_minreallyold) {
 		V_rtq_reallyold = 2 * V_rtq_reallyold / 3;
 		if (V_rtq_reallyold < V_rtq_minreallyold) {
 			V_rtq_reallyold = V_rtq_minreallyold;
 		}
 
 		last_adjusted_timeout = time_uptime;
 #ifdef DIAGNOSTIC
 		log(LOG_DEBUG, "in_rtqtimo: adjusted rtq_reallyold to %d\n",
 		    V_rtq_reallyold);
 #endif
 		arg.found = arg.killed = 0;
 		arg.updating = 1;
 		RADIX_NODE_HEAD_LOCK(rnh);
 		rnh->rnh_walktree(rnh, in_rtqkill, &arg);
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 	}
 
 }
 
 void
 in_rtqdrain(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 	struct radix_node_head *rnh;
 	struct rtqk_arg arg;
 	int 	fibnum;
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 
 		for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) {
 			rnh = rt_tables_get_rnh(fibnum, AF_INET);
 			arg.found = arg.killed = 0;
 			arg.rnh = rnh;
 			arg.nextstop = 0;
 			arg.draining = 1;
 			arg.updating = 0;
 			RADIX_NODE_HEAD_LOCK(rnh);
 			rnh->rnh_walktree(rnh, in_rtqkill, &arg);
 			RADIX_NODE_HEAD_UNLOCK(rnh);
 		}
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 void
 in_setmatchfunc(struct radix_node_head *rnh, int val)
 {
 
 	rnh->rnh_matchaddr = (val != 0) ? rn_match : in_matroute;
 }
 
 static int _in_rt_was_here;
 /*
  * Initialize our routing tree.
  */
 int
 in_inithead(void **head, int off)
 {
 	struct radix_node_head *rnh;
 
 	/* XXX MRT
 	 * This can be called from vfs_export.c too in which case 'off'
 	 * will be 0. We know the correct value so just use that and
 	 * return directly if it was 0.
 	 * This is a hack that replaces an even worse hack on a bad hack
 	 * on a bad design. After RELENG_7 this should be fixed but that
 	 * will change the ABI, so for now do it this way.
 	 */
 	if (!rn_inithead(head, 32))
 		return 0;
 
+	rnh = *head;
+	RADIX_NODE_HEAD_LOCK_INIT(rnh);
+
 	if (off == 0)		/* XXX MRT  see above */
 		return 1;	/* only do the rest for a real routing table */
 
-	rnh = *head;
 	rnh->rnh_addaddr = in_addroute;
 	in_setmatchfunc(rnh, V_drop_redirect);
 	rnh->rnh_close = in_clsroute;
 	if (_in_rt_was_here == 0 ) {
 		callout_init(&V_rtq_timer, CALLOUT_MPSAFE);
 		callout_reset(&V_rtq_timer, 1, in_rtqtimo, curvnet);
 		_in_rt_was_here = 1;
 	}
 	return 1;
 }
 
 #ifdef VIMAGE
 int
 in_detachhead(void **head, int off)
 {
 
 	callout_drain(&V_rtq_timer);
 	return (1);
 }
 #endif
 
 /*
  * This zaps old routes when the interface goes down or interface
  * address is deleted.  In the latter case, it deletes static routes
  * that point to this address.  If we don't do this, we may end up
  * using the old address in the future.  The ones we always want to
  * get rid of are things like ARP entries, since the user might down
  * the interface, walk over to a completely different network, and
  * plug back in.
  */
 struct in_ifadown_arg {
 	struct radix_node_head *rnh;
 	struct ifaddr *ifa;
 	int del;
 };
 
 static int
 in_ifadownkill(struct radix_node *rn, void *xap)
 {
 	struct in_ifadown_arg *ap = xap;
 	struct rtentry *rt = (struct rtentry *)rn;
 
 	RT_LOCK(rt);
 	if (rt->rt_ifa == ap->ifa &&
 	    (ap->del || !(rt->rt_flags & RTF_STATIC))) {
 		/*
 		 * Aquire a reference so that it can later be freed
 		 * as the refcount would be 0 here in case of at least
 		 * ap->del.
 		 */
 		RT_ADDREF(rt);
 		/*
 		 * Disconnect it from the tree and permit protocols
 		 * to cleanup.
 		 */
 		rt_expunge(ap->rnh, rt);
 		/*
 		 * At this point it is an rttrash node, and in case
 		 * the above is the only reference we must free it.
 		 * If we do not noone will have a pointer and the
 		 * rtentry will be leaked forever.
 		 * In case someone else holds a reference, we are
 		 * fine as we only decrement the refcount. In that
 		 * case if the other entity calls RT_REMREF, we
 		 * will still be leaking but at least we tried.
 		 */
 		RTFREE_LOCKED(rt);
 		return (0);
 	}
 	RT_UNLOCK(rt);
 	return 0;
 }
 
 void
 in_ifadown(struct ifaddr *ifa, int delete)
 {
 	struct in_ifadown_arg arg;
 	struct radix_node_head *rnh;
 	int	fibnum;
 
 	KASSERT(ifa->ifa_addr->sa_family == AF_INET,
 	    ("%s: wrong family", __func__));
 
 	for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) {
 		rnh = rt_tables_get_rnh(fibnum, AF_INET);
 		arg.rnh = rnh;
 		arg.ifa = ifa;
 		arg.del = delete;
 		RADIX_NODE_HEAD_LOCK(rnh);
 		rnh->rnh_walktree(rnh, in_ifadownkill, &arg);
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 		ifa->ifa_flags &= ~IFA_ROUTE;		/* XXXlocking? */
 	}
 }
 
 /*
  * inet versions of rt functions. These have fib extensions and 
  * for now will just reference the _fib variants.
  * eventually this order will be reversed,
  */
 void
 in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum)
 {
 	rtalloc_ign_fib(ro, ignflags, fibnum);
 }
 
 int
 in_rtrequest( int req,
 	struct sockaddr *dst,
 	struct sockaddr *gateway,
 	struct sockaddr *netmask,
 	int flags,
 	struct rtentry **ret_nrt,
 	u_int fibnum)
 {
 	return (rtrequest_fib(req, dst, gateway, netmask, 
 	    flags, ret_nrt, fibnum));
 }
 
 struct rtentry *
 in_rtalloc1(struct sockaddr *dst, int report, u_long ignflags, u_int fibnum)
 {
 	return (rtalloc1_fib(dst, report, ignflags, fibnum));
 }
 
 void
 in_rtredirect(struct sockaddr *dst,
 	struct sockaddr *gateway,
 	struct sockaddr *netmask,
 	int flags,
 	struct sockaddr *src,
 	u_int fibnum)
 {
 	rtredirect_fib(dst, gateway, netmask, flags, src, fibnum);
 }
  
 void
 in_rtalloc(struct route *ro, u_int fibnum)
 {
 	rtalloc_ign_fib(ro, 0UL, fibnum);
 }
 
 #if 0
 int	 in_rt_getifa(struct rt_addrinfo *, u_int fibnum);
 int	 in_rtioctl(u_long, caddr_t, u_int);
 int	 in_rtrequest1(int, struct rt_addrinfo *, struct rtentry **, u_int);
 #endif
 
 
Index: head/sys/netinet6/in6_rmx.c
===================================================================
--- head/sys/netinet6/in6_rmx.c	(revision 272360)
+++ head/sys/netinet6/in6_rmx.c	(revision 272361)
@@ -1,336 +1,338 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: in6_rmx.c,v 1.11 2001/07/26 06:53:16 jinmei Exp $
  */
 
 /*-
  * Copyright 1994, 1995 Massachusetts Institute of Technology
  *
  * Permission to use, copy, modify, and distribute this software and
  * its documentation for any purpose and without fee is hereby
  * granted, provided that both the above copyright notice and this
  * permission notice appear in all copies, that both the above
  * copyright notice and this permission notice appear in all
  * supporting documentation, and that the name of M.I.T. not be used
  * in advertising or publicity pertaining to distribution of the
  * software without specific, written prior permission.  M.I.T. makes
  * no representations about the suitability of this software for any
  * purpose.  It is provided "as is" without express or implied
  * warranty.
  *
  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/sysctl.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/mbuf.h>
 #include <sys/rwlock.h>
 #include <sys/syslog.h>
 #include <sys/callout.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/ip_var.h>
 #include <netinet/in_var.h>
 
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 
 #include <netinet/icmp6.h>
 #include <netinet6/nd6.h>
 
 #include <netinet/tcp.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 
 extern int	in6_inithead(void **head, int off);
 #ifdef VIMAGE
 extern int	in6_detachhead(void **head, int off);
 #endif
 
 /*
  * Do what we need to do when inserting a route.
  */
 static struct radix_node *
 in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
     struct radix_node *treenodes)
 {
 	struct rtentry *rt = (struct rtentry *)treenodes;
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rt_key(rt);
 	struct radix_node *ret;
 
 	RADIX_NODE_HEAD_WLOCK_ASSERT(head);
 	if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 		rt->rt_flags |= RTF_MULTICAST;
 
 	/*
 	 * A little bit of help for both IPv6 output and input:
 	 *   For local addresses, we make sure that RTF_LOCAL is set,
 	 *   with the thought that this might one day be used to speed up
 	 *   ip_input().
 	 *
 	 * We also mark routes to multicast addresses as such, because
 	 * it's easy to do and might be useful (but this is much more
 	 * dubious since it's so easy to inspect the address).  (This
 	 * is done above.)
 	 *
 	 * XXX
 	 * should elaborate the code.
 	 */
 	if (rt->rt_flags & RTF_HOST) {
 		if (IN6_ARE_ADDR_EQUAL(&satosin6(rt->rt_ifa->ifa_addr)
 					->sin6_addr,
 				       &sin6->sin6_addr)) {
 			rt->rt_flags |= RTF_LOCAL;
 		}
 	}
 
 	if (!rt->rt_mtu && rt->rt_ifp)
 		rt->rt_mtu = IN6_LINKMTU(rt->rt_ifp);
 
 	ret = rn_addroute(v_arg, n_arg, head, treenodes);
 	if (ret == NULL) {
 		struct rtentry *rt2;
 		/*
 		 * We are trying to add a net route, but can't.
 		 * The following case should be allowed, so we'll make a
 		 * special check for this:
 		 *	Two IPv6 addresses with the same prefix is assigned
 		 *	to a single interrface.
 		 *	# ifconfig if0 inet6 3ffe:0501::1 prefix 64 alias (*1)
 		 *	# ifconfig if0 inet6 3ffe:0501::2 prefix 64 alias (*2)
 		 *	In this case, (*1) and (*2) want to add the same
 		 *	net route entry, 3ffe:0501:: -> if0.
 		 *	This case should not raise an error.
 		 */
 		rt2 = in6_rtalloc1((struct sockaddr *)sin6, 0, RTF_RNH_LOCKED,
 		    rt->rt_fibnum);
 		if (rt2) {
 			if (((rt2->rt_flags & (RTF_HOST|RTF_GATEWAY)) == 0)
 			 && rt2->rt_gateway
 			 && rt2->rt_gateway->sa_family == AF_LINK
 			 && rt2->rt_ifp == rt->rt_ifp) {
 				ret = rt2->rt_nodes;
 			}
 			RTFREE_LOCKED(rt2);
 		}
 	}
 	return (ret);
 }
 
 SYSCTL_DECL(_net_inet6_ip6);
 
 static VNET_DEFINE(int, rtq_toomany6) = 128;
 	/* 128 cached routes is ``too many'' */
 #define	V_rtq_toomany6			VNET(rtq_toomany6)
 SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW,
     &VNET_NAME(rtq_toomany6) , 0, "");
 
 struct rtqk_arg {
 	struct radix_node_head *rnh;
 	int mode;
 	int updating;
 	int draining;
 	int killed;
 	int found;
 	time_t nextstop;
 };
 
 /*
  * Age old PMTUs.
  */
 struct mtuex_arg {
 	struct radix_node_head *rnh;
 	time_t nextstop;
 };
 static VNET_DEFINE(struct callout, rtq_mtutimer);
 #define	V_rtq_mtutimer			VNET(rtq_mtutimer)
 
 static int
 in6_mtuexpire(struct radix_node *rn, void *rock)
 {
 	struct rtentry *rt = (struct rtentry *)rn;
 	struct mtuex_arg *ap = rock;
 
 	/* sanity */
 	if (!rt)
 		panic("rt == NULL in in6_mtuexpire");
 
 	if (rt->rt_expire && !(rt->rt_flags & RTF_PROBEMTU)) {
 		if (rt->rt_expire <= time_uptime) {
 			rt->rt_flags |= RTF_PROBEMTU;
 		} else {
 			ap->nextstop = lmin(ap->nextstop, rt->rt_expire);
 		}
 	}
 
 	return 0;
 }
 
 #define	MTUTIMO_DEFAULT	(60*1)
 
 static void
 in6_mtutimo_one(struct radix_node_head *rnh)
 {
 	struct mtuex_arg arg;
 
 	arg.rnh = rnh;
 	arg.nextstop = time_uptime + MTUTIMO_DEFAULT;
 	RADIX_NODE_HEAD_LOCK(rnh);
 	rnh->rnh_walktree(rnh, in6_mtuexpire, &arg);
 	RADIX_NODE_HEAD_UNLOCK(rnh);
 }
 
 static void
 in6_mtutimo(void *rock)
 {
 	CURVNET_SET_QUIET((struct vnet *) rock);
 	struct radix_node_head *rnh;
 	struct timeval atv;
 	u_int fibnum;
 
 	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
 		rnh = rt_tables_get_rnh(fibnum, AF_INET6);
 		if (rnh != NULL)
 			in6_mtutimo_one(rnh);
 	}
 
 	atv.tv_sec = MTUTIMO_DEFAULT;
 	atv.tv_usec = 0;
 	callout_reset(&V_rtq_mtutimer, tvtohz(&atv), in6_mtutimo, rock);
 	CURVNET_RESTORE();
 }
 
 /*
  * Initialize our routing tree.
  * XXX MRT When off == 0, we are being called from vfs_export.c
  * so just set up their table and leave. (we know what the correct
  * value should be so just use that).. FIX AFTER RELENG_7 is MFC'd
  * see also comments in in_inithead() vfs_export.c and domain.h
  */
 static VNET_DEFINE(int, _in6_rt_was_here);
 #define	V__in6_rt_was_here	VNET(_in6_rt_was_here)
 
 int
 in6_inithead(void **head, int off)
 {
 	struct radix_node_head *rnh;
 
 	if (!rn_inithead(head, offsetof(struct sockaddr_in6, sin6_addr) << 3))
 		return 0;		/* See above */
 
+	rnh = *head;
+	RADIX_NODE_HEAD_LOCK_INIT(rnh);
+
 	if (off == 0)		/* See above */
 		return 1;	/* only do the rest for the real thing */
 
-	rnh = *head;
 	rnh->rnh_addaddr = in6_addroute;
 
 	if (V__in6_rt_was_here == 0) {
 		callout_init(&V_rtq_mtutimer, CALLOUT_MPSAFE);
 		in6_mtutimo(curvnet);	/* kick off timeout first time */
 		V__in6_rt_was_here = 1;
 	}
 
 	return 1;
 }
 
 #ifdef VIMAGE
 int
 in6_detachhead(void **head, int off)
 {
 
 	callout_drain(&V_rtq_mtutimer);
 	return (1);
 }
 #endif
 
 /*
  * Extended API for IPv6 FIB support.
  */
 void
 in6_rtredirect(struct sockaddr *dst, struct sockaddr *gw, struct sockaddr *nm,
     int flags, struct sockaddr *src, u_int fibnum)
 {
 
 	rtredirect_fib(dst, gw, nm, flags, src, fibnum);
 }
 
 int
 in6_rtrequest(int req, struct sockaddr *dst, struct sockaddr *gw,
     struct sockaddr *mask, int flags, struct rtentry **ret_nrt, u_int fibnum)
 {
 
 	return (rtrequest_fib(req, dst, gw, mask, flags, ret_nrt, fibnum));
 }
 
 void
 in6_rtalloc(struct route_in6 *ro, u_int fibnum)
 {
 
 	rtalloc_ign_fib((struct route *)ro, 0ul, fibnum);
 }
 
 void
 in6_rtalloc_ign(struct route_in6 *ro, u_long ignflags, u_int fibnum)
 {
 
 	rtalloc_ign_fib((struct route *)ro, ignflags, fibnum);
 }
 
 struct rtentry *
 in6_rtalloc1(struct sockaddr *dst, int report, u_long ignflags, u_int fibnum)
 {
 
 	return (rtalloc1_fib(dst, report, ignflags, fibnum));
 }
Index: head/sys/netpfil/pf/pf_table.c
===================================================================
--- head/sys/netpfil/pf/pf_table.c	(revision 272360)
+++ head/sys/netpfil/pf/pf_table.c	(revision 272361)
@@ -1,2197 +1,2193 @@
 /*-
  * Copyright (c) 2002 Cedric Berger
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  *    - Redistributions of source code must retain the above copyright
  *      notice, this list of conditions and the following disclaimer.
  *    - Redistributions in binary form must reproduce the above
  *      copyright notice, this list of conditions and the following
  *      disclaimer in the documentation and/or other materials provided
  *      with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  *	$OpenBSD: pf_table.c,v 1.79 2008/10/08 06:24:50 mcbride Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/refcount.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/vnet.h>
 #include <net/pfvar.h>
 
 #define	ACCEPT_FLAGS(flags, oklist)		\
 	do {					\
 		if ((flags & ~(oklist)) &	\
 		    PFR_FLAG_ALLMASK)		\
 			return (EINVAL);	\
 	} while (0)
 
 #define	FILLIN_SIN(sin, addr)			\
 	do {					\
 		(sin).sin_len = sizeof(sin);	\
 		(sin).sin_family = AF_INET;	\
 		(sin).sin_addr = (addr);	\
 	} while (0)
 
 #define	FILLIN_SIN6(sin6, addr)			\
 	do {					\
 		(sin6).sin6_len = sizeof(sin6);	\
 		(sin6).sin6_family = AF_INET6;	\
 		(sin6).sin6_addr = (addr);	\
 	} while (0)
 
 #define	SWAP(type, a1, a2)			\
 	do {					\
 		type tmp = a1;			\
 		a1 = a2;			\
 		a2 = tmp;			\
 	} while (0)
 
 #define	SUNION2PF(su, af) (((af)==AF_INET) ?	\
     (struct pf_addr *)&(su)->sin.sin_addr :	\
     (struct pf_addr *)&(su)->sin6.sin6_addr)
 
 #define	AF_BITS(af)		(((af)==AF_INET)?32:128)
 #define	ADDR_NETWORK(ad)	((ad)->pfra_net < AF_BITS((ad)->pfra_af))
 #define	KENTRY_NETWORK(ke)	((ke)->pfrke_net < AF_BITS((ke)->pfrke_af))
 #define	KENTRY_RNF_ROOT(ke) \
 		((((struct radix_node *)(ke))->rn_flags & RNF_ROOT) != 0)
 
 #define	NO_ADDRESSES		(-1)
 #define	ENQUEUE_UNMARKED_ONLY	(1)
 #define	INVERT_NEG_FLAG		(1)
 
 struct pfr_walktree {
 	enum pfrw_op {
 		PFRW_MARK,
 		PFRW_SWEEP,
 		PFRW_ENQUEUE,
 		PFRW_GET_ADDRS,
 		PFRW_GET_ASTATS,
 		PFRW_POOL_GET,
 		PFRW_DYNADDR_UPDATE
 	}	 pfrw_op;
 	union {
 		struct pfr_addr		*pfrw1_addr;
 		struct pfr_astats	*pfrw1_astats;
 		struct pfr_kentryworkq	*pfrw1_workq;
 		struct pfr_kentry	*pfrw1_kentry;
 		struct pfi_dynaddr	*pfrw1_dyn;
 	}	 pfrw_1;
 	int	 pfrw_free;
 };
 #define	pfrw_addr	pfrw_1.pfrw1_addr
 #define	pfrw_astats	pfrw_1.pfrw1_astats
 #define	pfrw_workq	pfrw_1.pfrw1_workq
 #define	pfrw_kentry	pfrw_1.pfrw1_kentry
 #define	pfrw_dyn	pfrw_1.pfrw1_dyn
 #define	pfrw_cnt	pfrw_free
 
 #define	senderr(e)	do { rv = (e); goto _bad; } while (0)
 
 static MALLOC_DEFINE(M_PFTABLE, "pf_table", "pf(4) tables structures");
 static VNET_DEFINE(uma_zone_t, pfr_kentry_z);
 #define	V_pfr_kentry_z		VNET(pfr_kentry_z)
 static VNET_DEFINE(uma_zone_t, pfr_kcounters_z);
 #define	V_pfr_kcounters_z	VNET(pfr_kcounters_z)
 
 static struct pf_addr	 pfr_ffaddr = {
 	.addr32 = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }
 };
 
 static void		 pfr_copyout_addr(struct pfr_addr *,
 			    struct pfr_kentry *ke);
 static int		 pfr_validate_addr(struct pfr_addr *);
 static void		 pfr_enqueue_addrs(struct pfr_ktable *,
 			    struct pfr_kentryworkq *, int *, int);
 static void		 pfr_mark_addrs(struct pfr_ktable *);
 static struct pfr_kentry
 			*pfr_lookup_addr(struct pfr_ktable *,
 			    struct pfr_addr *, int);
 static struct pfr_kentry *pfr_create_kentry(struct pfr_addr *);
 static void		 pfr_destroy_kentries(struct pfr_kentryworkq *);
 static void		 pfr_destroy_kentry(struct pfr_kentry *);
 static void		 pfr_insert_kentries(struct pfr_ktable *,
 			    struct pfr_kentryworkq *, long);
 static void		 pfr_remove_kentries(struct pfr_ktable *,
 			    struct pfr_kentryworkq *);
 static void		 pfr_clstats_kentries(struct pfr_kentryworkq *, long,
 			    int);
 static void		 pfr_reset_feedback(struct pfr_addr *, int);
 static void		 pfr_prepare_network(union sockaddr_union *, int, int);
 static int		 pfr_route_kentry(struct pfr_ktable *,
 			    struct pfr_kentry *);
 static int		 pfr_unroute_kentry(struct pfr_ktable *,
 			    struct pfr_kentry *);
 static int		 pfr_walktree(struct radix_node *, void *);
 static int		 pfr_validate_table(struct pfr_table *, int, int);
 static int		 pfr_fix_anchor(char *);
 static void		 pfr_commit_ktable(struct pfr_ktable *, long);
 static void		 pfr_insert_ktables(struct pfr_ktableworkq *);
 static void		 pfr_insert_ktable(struct pfr_ktable *);
 static void		 pfr_setflags_ktables(struct pfr_ktableworkq *);
 static void		 pfr_setflags_ktable(struct pfr_ktable *, int);
 static void		 pfr_clstats_ktables(struct pfr_ktableworkq *, long,
 			    int);
 static void		 pfr_clstats_ktable(struct pfr_ktable *, long, int);
 static struct pfr_ktable
 			*pfr_create_ktable(struct pfr_table *, long, int);
 static void		 pfr_destroy_ktables(struct pfr_ktableworkq *, int);
 static void		 pfr_destroy_ktable(struct pfr_ktable *, int);
 static int		 pfr_ktable_compare(struct pfr_ktable *,
 			    struct pfr_ktable *);
 static struct pfr_ktable
 			*pfr_lookup_table(struct pfr_table *);
 static void		 pfr_clean_node_mask(struct pfr_ktable *,
 			    struct pfr_kentryworkq *);
 static int		 pfr_table_count(struct pfr_table *, int);
 static int		 pfr_skip_table(struct pfr_table *,
 			    struct pfr_ktable *, int);
 static struct pfr_kentry
 			*pfr_kentry_byidx(struct pfr_ktable *, int, int);
 
 static RB_PROTOTYPE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare);
 static RB_GENERATE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare);
 
 struct pfr_ktablehead	 pfr_ktables;
 struct pfr_table	 pfr_nulltable;
 int			 pfr_ktable_cnt;
 
 void
 pfr_initialize(void)
 {
 
 	V_pfr_kentry_z = uma_zcreate("pf table entries",
 	    sizeof(struct pfr_kentry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
 	    0);
 	V_pfr_kcounters_z = uma_zcreate("pf table counters",
 	    sizeof(struct pfr_kcounters), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 	V_pf_limits[PF_LIMIT_TABLE_ENTRIES].zone = V_pfr_kentry_z;
 	V_pf_limits[PF_LIMIT_TABLE_ENTRIES].limit = PFR_KENTRY_HIWAT;
 }
 
 void
 pfr_cleanup(void)
 {
 
 	uma_zdestroy(V_pfr_kentry_z);
 	uma_zdestroy(V_pfr_kcounters_z);
 }
 
 int
 pfr_clr_addrs(struct pfr_table *tbl, int *ndel, int flags)
 {
 	struct pfr_ktable	*kt;
 	struct pfr_kentryworkq	 workq;
 
 	PF_RULES_WASSERT();
 
 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
 	if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
 		return (EINVAL);
 	kt = pfr_lookup_table(tbl);
 	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
 		return (ESRCH);
 	if (kt->pfrkt_flags & PFR_TFLAG_CONST)
 		return (EPERM);
 	pfr_enqueue_addrs(kt, &workq, ndel, 0);
 
 	if (!(flags & PFR_FLAG_DUMMY)) {
 		pfr_remove_kentries(kt, &workq);
 		KASSERT(kt->pfrkt_cnt == 0, ("%s: non-null pfrkt_cnt", __func__));
 	}
 	return (0);
 }
 
 int
 pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
     int *nadd, int flags)
 {
 	struct pfr_ktable	*kt, *tmpkt;
 	struct pfr_kentryworkq	 workq;
 	struct pfr_kentry	*p, *q;
 	struct pfr_addr		*ad;
 	int			 i, rv, xadd = 0;
 	long			 tzero = time_second;
 
 	PF_RULES_WASSERT();
 
 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
 	if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
 		return (EINVAL);
 	kt = pfr_lookup_table(tbl);
 	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
 		return (ESRCH);
 	if (kt->pfrkt_flags & PFR_TFLAG_CONST)
 		return (EPERM);
 	tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0);
 	if (tmpkt == NULL)
 		return (ENOMEM);
 	SLIST_INIT(&workq);
 	for (i = 0, ad = addr; i < size; i++, ad++) {
 		if (pfr_validate_addr(ad))
 			senderr(EINVAL);
 		p = pfr_lookup_addr(kt, ad, 1);
 		q = pfr_lookup_addr(tmpkt, ad, 1);
 		if (flags & PFR_FLAG_FEEDBACK) {
 			if (q != NULL)
 				ad->pfra_fback = PFR_FB_DUPLICATE;
 			else if (p == NULL)
 				ad->pfra_fback = PFR_FB_ADDED;
 			else if (p->pfrke_not != ad->pfra_not)
 				ad->pfra_fback = PFR_FB_CONFLICT;
 			else
 				ad->pfra_fback = PFR_FB_NONE;
 		}
 		if (p == NULL && q == NULL) {
 			p = pfr_create_kentry(ad);
 			if (p == NULL)
 				senderr(ENOMEM);
 			if (pfr_route_kentry(tmpkt, p)) {
 				pfr_destroy_kentry(p);
 				ad->pfra_fback = PFR_FB_NONE;
 			} else {
 				SLIST_INSERT_HEAD(&workq, p, pfrke_workq);
 				xadd++;
 			}
 		}
 	}
 	pfr_clean_node_mask(tmpkt, &workq);
 	if (!(flags & PFR_FLAG_DUMMY))
 		pfr_insert_kentries(kt, &workq, tzero);
 	else
 		pfr_destroy_kentries(&workq);
 	if (nadd != NULL)
 		*nadd = xadd;
 	pfr_destroy_ktable(tmpkt, 0);
 	return (0);
 _bad:
 	pfr_clean_node_mask(tmpkt, &workq);
 	pfr_destroy_kentries(&workq);
 	if (flags & PFR_FLAG_FEEDBACK)
 		pfr_reset_feedback(addr, size);
 	pfr_destroy_ktable(tmpkt, 0);
 	return (rv);
 }
 
 int
 pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
     int *ndel, int flags)
 {
 	struct pfr_ktable	*kt;
 	struct pfr_kentryworkq	 workq;
 	struct pfr_kentry	*p;
 	struct pfr_addr		*ad;
 	int			 i, rv, xdel = 0, log = 1;
 
 	PF_RULES_WASSERT();
 
 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
 	if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
 		return (EINVAL);
 	kt = pfr_lookup_table(tbl);
 	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
 		return (ESRCH);
 	if (kt->pfrkt_flags & PFR_TFLAG_CONST)
 		return (EPERM);
 	/*
 	 * there are two algorithms to choose from here.
 	 * with:
 	 *   n: number of addresses to delete
 	 *   N: number of addresses in the table
 	 *
 	 * one is O(N) and is better for large 'n'
 	 * one is O(n*LOG(N)) and is better for small 'n'
 	 *
 	 * following code try to decide which one is best.
 	 */
 	for (i = kt->pfrkt_cnt; i > 0; i >>= 1)
 		log++;
 	if (size > kt->pfrkt_cnt/log) {
 		/* full table scan */
 		pfr_mark_addrs(kt);
 	} else {
 		/* iterate over addresses to delete */
 		for (i = 0, ad = addr; i < size; i++, ad++) {
 			if (pfr_validate_addr(ad))
 				return (EINVAL);
 			p = pfr_lookup_addr(kt, ad, 1);
 			if (p != NULL)
 				p->pfrke_mark = 0;
 		}
 	}
 	SLIST_INIT(&workq);
 	for (i = 0, ad = addr; i < size; i++, ad++) {
 		if (pfr_validate_addr(ad))
 			senderr(EINVAL);
 		p = pfr_lookup_addr(kt, ad, 1);
 		if (flags & PFR_FLAG_FEEDBACK) {
 			if (p == NULL)
 				ad->pfra_fback = PFR_FB_NONE;
 			else if (p->pfrke_not != ad->pfra_not)
 				ad->pfra_fback = PFR_FB_CONFLICT;
 			else if (p->pfrke_mark)
 				ad->pfra_fback = PFR_FB_DUPLICATE;
 			else
 				ad->pfra_fback = PFR_FB_DELETED;
 		}
 		if (p != NULL && p->pfrke_not == ad->pfra_not &&
 		    !p->pfrke_mark) {
 			p->pfrke_mark = 1;
 			SLIST_INSERT_HEAD(&workq, p, pfrke_workq);
 			xdel++;
 		}
 	}
 	if (!(flags & PFR_FLAG_DUMMY))
 		pfr_remove_kentries(kt, &workq);
 	if (ndel != NULL)
 		*ndel = xdel;
 	return (0);
 _bad:
 	if (flags & PFR_FLAG_FEEDBACK)
 		pfr_reset_feedback(addr, size);
 	return (rv);
 }
 
 int
 pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
     int *size2, int *nadd, int *ndel, int *nchange, int flags,
     u_int32_t ignore_pfrt_flags)
 {
 	struct pfr_ktable	*kt, *tmpkt;
 	struct pfr_kentryworkq	 addq, delq, changeq;
 	struct pfr_kentry	*p, *q;
 	struct pfr_addr		 ad;
 	int			 i, rv, xadd = 0, xdel = 0, xchange = 0;
 	long			 tzero = time_second;
 
 	PF_RULES_WASSERT();
 
 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
 	if (pfr_validate_table(tbl, ignore_pfrt_flags, flags &
 	    PFR_FLAG_USERIOCTL))
 		return (EINVAL);
 	kt = pfr_lookup_table(tbl);
 	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
 		return (ESRCH);
 	if (kt->pfrkt_flags & PFR_TFLAG_CONST)
 		return (EPERM);
 	tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0);
 	if (tmpkt == NULL)
 		return (ENOMEM);
 	pfr_mark_addrs(kt);
 	SLIST_INIT(&addq);
 	SLIST_INIT(&delq);
 	SLIST_INIT(&changeq);
 	for (i = 0; i < size; i++) {
 		/*
 		 * XXXGL: undertand pf_if usage of this function
 		 * and make ad a moving pointer
 		 */
 		bcopy(addr + i, &ad, sizeof(ad));
 		if (pfr_validate_addr(&ad))
 			senderr(EINVAL);
 		ad.pfra_fback = PFR_FB_NONE;
 		p = pfr_lookup_addr(kt, &ad, 1);
 		if (p != NULL) {
 			if (p->pfrke_mark) {
 				ad.pfra_fback = PFR_FB_DUPLICATE;
 				goto _skip;
 			}
 			p->pfrke_mark = 1;
 			if (p->pfrke_not != ad.pfra_not) {
 				SLIST_INSERT_HEAD(&changeq, p, pfrke_workq);
 				ad.pfra_fback = PFR_FB_CHANGED;
 				xchange++;
 			}
 		} else {
 			q = pfr_lookup_addr(tmpkt, &ad, 1);
 			if (q != NULL) {
 				ad.pfra_fback = PFR_FB_DUPLICATE;
 				goto _skip;
 			}
 			p = pfr_create_kentry(&ad);
 			if (p == NULL)
 				senderr(ENOMEM);
 			if (pfr_route_kentry(tmpkt, p)) {
 				pfr_destroy_kentry(p);
 				ad.pfra_fback = PFR_FB_NONE;
 			} else {
 				SLIST_INSERT_HEAD(&addq, p, pfrke_workq);
 				ad.pfra_fback = PFR_FB_ADDED;
 				xadd++;
 			}
 		}
 _skip:
 		if (flags & PFR_FLAG_FEEDBACK)
 			bcopy(&ad, addr + i, sizeof(ad));
 	}
 	pfr_enqueue_addrs(kt, &delq, &xdel, ENQUEUE_UNMARKED_ONLY);
 	if ((flags & PFR_FLAG_FEEDBACK) && *size2) {
 		if (*size2 < size+xdel) {
 			*size2 = size+xdel;
 			senderr(0);
 		}
 		i = 0;
 		SLIST_FOREACH(p, &delq, pfrke_workq) {
 			pfr_copyout_addr(&ad, p);
 			ad.pfra_fback = PFR_FB_DELETED;
 			bcopy(&ad, addr + size + i, sizeof(ad));
 			i++;
 		}
 	}
 	pfr_clean_node_mask(tmpkt, &addq);
 	if (!(flags & PFR_FLAG_DUMMY)) {
 		pfr_insert_kentries(kt, &addq, tzero);
 		pfr_remove_kentries(kt, &delq);
 		pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG);
 	} else
 		pfr_destroy_kentries(&addq);
 	if (nadd != NULL)
 		*nadd = xadd;
 	if (ndel != NULL)
 		*ndel = xdel;
 	if (nchange != NULL)
 		*nchange = xchange;
 	if ((flags & PFR_FLAG_FEEDBACK) && size2)
 		*size2 = size+xdel;
 	pfr_destroy_ktable(tmpkt, 0);
 	return (0);
 _bad:
 	pfr_clean_node_mask(tmpkt, &addq);
 	pfr_destroy_kentries(&addq);
 	if (flags & PFR_FLAG_FEEDBACK)
 		pfr_reset_feedback(addr, size);
 	pfr_destroy_ktable(tmpkt, 0);
 	return (rv);
 }
 
 int
 pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
 	int *nmatch, int flags)
 {
 	struct pfr_ktable	*kt;
 	struct pfr_kentry	*p;
 	struct pfr_addr		*ad;
 	int			 i, xmatch = 0;
 
 	PF_RULES_RASSERT();
 
 	ACCEPT_FLAGS(flags, PFR_FLAG_REPLACE);
 	if (pfr_validate_table(tbl, 0, 0))
 		return (EINVAL);
 	kt = pfr_lookup_table(tbl);
 	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
 		return (ESRCH);
 
 	for (i = 0, ad = addr; i < size; i++, ad++) {
 		if (pfr_validate_addr(ad))
 			return (EINVAL);
 		if (ADDR_NETWORK(ad))
 			return (EINVAL);
 		p = pfr_lookup_addr(kt, ad, 0);
 		if (flags & PFR_FLAG_REPLACE)
 			pfr_copyout_addr(ad, p);
 		ad->pfra_fback = (p == NULL) ? PFR_FB_NONE :
 		    (p->pfrke_not ? PFR_FB_NOTMATCH : PFR_FB_MATCH);
 		if (p != NULL && !p->pfrke_not)
 			xmatch++;
 	}
 	if (nmatch != NULL)
 		*nmatch = xmatch;
 	return (0);
 }
 
 int
 pfr_get_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int *size,
 	int flags)
 {
 	struct pfr_ktable	*kt;
 	struct pfr_walktree	 w;
 	int			 rv;
 
 	PF_RULES_RASSERT();
 
 	ACCEPT_FLAGS(flags, 0);
 	if (pfr_validate_table(tbl, 0, 0))
 		return (EINVAL);
 	kt = pfr_lookup_table(tbl);
 	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
 		return (ESRCH);
 	if (kt->pfrkt_cnt > *size) {
 		*size = kt->pfrkt_cnt;
 		return (0);
 	}
 
 	bzero(&w, sizeof(w));
 	w.pfrw_op = PFRW_GET_ADDRS;
 	w.pfrw_addr = addr;
 	w.pfrw_free = kt->pfrkt_cnt;
 	rv = kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
 	if (!rv)
 		rv = kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree,
 		    &w);
 	if (rv)
 		return (rv);
 
 	KASSERT(w.pfrw_free == 0, ("%s: corruption detected (%d)", __func__,
 	    w.pfrw_free));
 
 	*size = kt->pfrkt_cnt;
 	return (0);
 }
 
 int
 pfr_get_astats(struct pfr_table *tbl, struct pfr_astats *addr, int *size,
 	int flags)
 {
 	struct pfr_ktable	*kt;
 	struct pfr_walktree	 w;
 	struct pfr_kentryworkq	 workq;
 	int			 rv;
 	long			 tzero = time_second;
 
 	PF_RULES_RASSERT();
 
 	/* XXX PFR_FLAG_CLSTATS disabled */
 	ACCEPT_FLAGS(flags, 0);
 	if (pfr_validate_table(tbl, 0, 0))
 		return (EINVAL);
 	kt = pfr_lookup_table(tbl);
 	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
 		return (ESRCH);
 	if (kt->pfrkt_cnt > *size) {
 		*size = kt->pfrkt_cnt;
 		return (0);
 	}
 
 	bzero(&w, sizeof(w));
 	w.pfrw_op = PFRW_GET_ASTATS;
 	w.pfrw_astats = addr;
 	w.pfrw_free = kt->pfrkt_cnt;
 	rv = kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
 	if (!rv)
 		rv = kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree,
 		    &w);
 	if (!rv && (flags & PFR_FLAG_CLSTATS)) {
 		pfr_enqueue_addrs(kt, &workq, NULL, 0);
 		pfr_clstats_kentries(&workq, tzero, 0);
 	}
 	if (rv)
 		return (rv);
 
 	if (w.pfrw_free) {
 		printf("pfr_get_astats: corruption detected (%d).\n",
 		    w.pfrw_free);
 		return (ENOTTY);
 	}
 	*size = kt->pfrkt_cnt;
 	return (0);
 }
 
 int
 pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size,
     int *nzero, int flags)
 {
 	struct pfr_ktable	*kt;
 	struct pfr_kentryworkq	 workq;
 	struct pfr_kentry	*p;
 	struct pfr_addr		*ad;
 	int			 i, rv, xzero = 0;
 
 	PF_RULES_WASSERT();
 
 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
 	if (pfr_validate_table(tbl, 0, 0))
 		return (EINVAL);
 	kt = pfr_lookup_table(tbl);
 	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
 		return (ESRCH);
 	SLIST_INIT(&workq);
 	for (i = 0, ad = addr; i < size; i++, ad++) {
 		if (pfr_validate_addr(ad))
 			senderr(EINVAL);
 		p = pfr_lookup_addr(kt, ad, 1);
 		if (flags & PFR_FLAG_FEEDBACK) {
 			ad->pfra_fback = (p != NULL) ?
 			    PFR_FB_CLEARED : PFR_FB_NONE;
 		}
 		if (p != NULL) {
 			SLIST_INSERT_HEAD(&workq, p, pfrke_workq);
 			xzero++;
 		}
 	}
 
 	if (!(flags & PFR_FLAG_DUMMY))
 		pfr_clstats_kentries(&workq, 0, 0);
 	if (nzero != NULL)
 		*nzero = xzero;
 	return (0);
 _bad:
 	if (flags & PFR_FLAG_FEEDBACK)
 		pfr_reset_feedback(addr, size);
 	return (rv);
 }
 
 static int
 pfr_validate_addr(struct pfr_addr *ad)
 {
 	int i;
 
 	switch (ad->pfra_af) {
 #ifdef INET
 	case AF_INET:
 		if (ad->pfra_net > 32)
 			return (-1);
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		if (ad->pfra_net > 128)
 			return (-1);
 		break;
 #endif /* INET6 */
 	default:
 		return (-1);
 	}
 	if (ad->pfra_net < 128 &&
 		(((caddr_t)ad)[ad->pfra_net/8] & (0xFF >> (ad->pfra_net%8))))
 			return (-1);
 	for (i = (ad->pfra_net+7)/8; i < sizeof(ad->pfra_u); i++)
 		if (((caddr_t)ad)[i])
 			return (-1);
 	if (ad->pfra_not && ad->pfra_not != 1)
 		return (-1);
 	if (ad->pfra_fback)
 		return (-1);
 	return (0);
 }
 
 static void
 pfr_enqueue_addrs(struct pfr_ktable *kt, struct pfr_kentryworkq *workq,
 	int *naddr, int sweep)
 {
 	struct pfr_walktree	w;
 
 	SLIST_INIT(workq);
 	bzero(&w, sizeof(w));
 	w.pfrw_op = sweep ? PFRW_SWEEP : PFRW_ENQUEUE;
 	w.pfrw_workq = workq;
 	if (kt->pfrkt_ip4 != NULL)
 		if (kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree,
 		    &w))
 			printf("pfr_enqueue_addrs: IPv4 walktree failed.\n");
 	if (kt->pfrkt_ip6 != NULL)
 		if (kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree,
 		    &w))
 			printf("pfr_enqueue_addrs: IPv6 walktree failed.\n");
 	if (naddr != NULL)
 		*naddr = w.pfrw_cnt;
 }
 
 static void
 pfr_mark_addrs(struct pfr_ktable *kt)
 {
 	struct pfr_walktree	w;
 
 	bzero(&w, sizeof(w));
 	w.pfrw_op = PFRW_MARK;
 	if (kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w))
 		printf("pfr_mark_addrs: IPv4 walktree failed.\n");
 	if (kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w))
 		printf("pfr_mark_addrs: IPv6 walktree failed.\n");
 }
 
 
 static struct pfr_kentry *
 pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact)
 {
 	union sockaddr_union	 sa, mask;
 	struct radix_node_head	*head = NULL;
 	struct pfr_kentry	*ke;
 
 	PF_RULES_ASSERT();
 
 	bzero(&sa, sizeof(sa));
 	if (ad->pfra_af == AF_INET) {
 		FILLIN_SIN(sa.sin, ad->pfra_ip4addr);
 		head = kt->pfrkt_ip4;
 	} else if ( ad->pfra_af == AF_INET6 ) {
 		FILLIN_SIN6(sa.sin6, ad->pfra_ip6addr);
 		head = kt->pfrkt_ip6;
 	}
 	if (ADDR_NETWORK(ad)) {
 		pfr_prepare_network(&mask, ad->pfra_af, ad->pfra_net);
 		ke = (struct pfr_kentry *)rn_lookup(&sa, &mask, head);
 		if (ke && KENTRY_RNF_ROOT(ke))
 			ke = NULL;
 	} else {
 		ke = (struct pfr_kentry *)rn_match(&sa, head);
 		if (ke && KENTRY_RNF_ROOT(ke))
 			ke = NULL;
 		if (exact && ke && KENTRY_NETWORK(ke))
 			ke = NULL;
 	}
 	return (ke);
 }
 
 static struct pfr_kentry *
 pfr_create_kentry(struct pfr_addr *ad)
 {
 	struct pfr_kentry	*ke;
 
 	ke =  uma_zalloc(V_pfr_kentry_z, M_NOWAIT | M_ZERO);
 	if (ke == NULL)
 		return (NULL);
 
 	if (ad->pfra_af == AF_INET)
 		FILLIN_SIN(ke->pfrke_sa.sin, ad->pfra_ip4addr);
 	else if (ad->pfra_af == AF_INET6)
 		FILLIN_SIN6(ke->pfrke_sa.sin6, ad->pfra_ip6addr);
 	ke->pfrke_af = ad->pfra_af;
 	ke->pfrke_net = ad->pfra_net;
 	ke->pfrke_not = ad->pfra_not;
 	return (ke);
 }
 
 static void
 pfr_destroy_kentries(struct pfr_kentryworkq *workq)
 {
 	struct pfr_kentry	*p, *q;
 
 	for (p = SLIST_FIRST(workq); p != NULL; p = q) {
 		q = SLIST_NEXT(p, pfrke_workq);
 		pfr_destroy_kentry(p);
 	}
 }
 
 static void
 pfr_destroy_kentry(struct pfr_kentry *ke)
 {
 	if (ke->pfrke_counters)
 		uma_zfree(V_pfr_kcounters_z, ke->pfrke_counters);
 	uma_zfree(V_pfr_kentry_z, ke);
 }
 
 static void
 pfr_insert_kentries(struct pfr_ktable *kt,
     struct pfr_kentryworkq *workq, long tzero)
 {
 	struct pfr_kentry	*p;
 	int			 rv, n = 0;
 
 	SLIST_FOREACH(p, workq, pfrke_workq) {
 		rv = pfr_route_kentry(kt, p);
 		if (rv) {
 			printf("pfr_insert_kentries: cannot route entry "
 			    "(code=%d).\n", rv);
 			break;
 		}
 		p->pfrke_tzero = tzero;
 		n++;
 	}
 	kt->pfrkt_cnt += n;
 }
 
 int
 pfr_insert_kentry(struct pfr_ktable *kt, struct pfr_addr *ad, long tzero)
 {
 	struct pfr_kentry	*p;
 	int			 rv;
 
 	p = pfr_lookup_addr(kt, ad, 1);
 	if (p != NULL)
 		return (0);
 	p = pfr_create_kentry(ad);
 	if (p == NULL)
 		return (ENOMEM);
 
 	rv = pfr_route_kentry(kt, p);
 	if (rv)
 		return (rv);
 
 	p->pfrke_tzero = tzero;
 	kt->pfrkt_cnt++;
 
 	return (0);
 }
 
 static void
 pfr_remove_kentries(struct pfr_ktable *kt,
     struct pfr_kentryworkq *workq)
 {
 	struct pfr_kentry	*p;
 	int			 n = 0;
 
 	SLIST_FOREACH(p, workq, pfrke_workq) {
 		pfr_unroute_kentry(kt, p);
 		n++;
 	}
 	kt->pfrkt_cnt -= n;
 	pfr_destroy_kentries(workq);
 }
 
 static void
 pfr_clean_node_mask(struct pfr_ktable *kt,
     struct pfr_kentryworkq *workq)
 {
 	struct pfr_kentry	*p;
 
 	SLIST_FOREACH(p, workq, pfrke_workq)
 		pfr_unroute_kentry(kt, p);
 }
 
 static void
 pfr_clstats_kentries(struct pfr_kentryworkq *workq, long tzero, int negchange)
 {
 	struct pfr_kentry	*p;
 
 	SLIST_FOREACH(p, workq, pfrke_workq) {
 		if (negchange)
 			p->pfrke_not = !p->pfrke_not;
 		if (p->pfrke_counters) {
 			uma_zfree(V_pfr_kcounters_z, p->pfrke_counters);
 			p->pfrke_counters = NULL;
 		}
 		p->pfrke_tzero = tzero;
 	}
 }
 
 static void
 pfr_reset_feedback(struct pfr_addr *addr, int size)
 {
 	struct pfr_addr	*ad;
 	int		i;
 
 	for (i = 0, ad = addr; i < size; i++, ad++)
 		ad->pfra_fback = PFR_FB_NONE;
 }
 
 static void
 pfr_prepare_network(union sockaddr_union *sa, int af, int net)
 {
 	int	i;
 
 	bzero(sa, sizeof(*sa));
 	if (af == AF_INET) {
 		sa->sin.sin_len = sizeof(sa->sin);
 		sa->sin.sin_family = AF_INET;
 		sa->sin.sin_addr.s_addr = net ? htonl(-1 << (32-net)) : 0;
 	} else if (af == AF_INET6) {
 		sa->sin6.sin6_len = sizeof(sa->sin6);
 		sa->sin6.sin6_family = AF_INET6;
 		for (i = 0; i < 4; i++) {
 			if (net <= 32) {
 				sa->sin6.sin6_addr.s6_addr32[i] =
 				    net ? htonl(-1 << (32-net)) : 0;
 				break;
 			}
 			sa->sin6.sin6_addr.s6_addr32[i] = 0xFFFFFFFF;
 			net -= 32;
 		}
 	}
 }
 
 static int
 pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke)
 {
 	union sockaddr_union	 mask;
 	struct radix_node	*rn;
 	struct radix_node_head	*head = NULL;
 
 	PF_RULES_WASSERT();
 
 	bzero(ke->pfrke_node, sizeof(ke->pfrke_node));
 	if (ke->pfrke_af == AF_INET)
 		head = kt->pfrkt_ip4;
 	else if (ke->pfrke_af == AF_INET6)
 		head = kt->pfrkt_ip6;
 
 	if (KENTRY_NETWORK(ke)) {
 		pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net);
 		rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node);
 	} else
 		rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node);
 
 	return (rn == NULL ? -1 : 0);
 }
 
 static int
 pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke)
 {
 	union sockaddr_union	 mask;
 	struct radix_node	*rn;
 	struct radix_node_head	*head = NULL;
 
 	if (ke->pfrke_af == AF_INET)
 		head = kt->pfrkt_ip4;
 	else if (ke->pfrke_af == AF_INET6)
 		head = kt->pfrkt_ip6;
 
 	if (KENTRY_NETWORK(ke)) {
 		pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net);
 		rn = rn_delete(&ke->pfrke_sa, &mask, head);
 	} else
 		rn = rn_delete(&ke->pfrke_sa, NULL, head);
 
 	if (rn == NULL) {
 		printf("pfr_unroute_kentry: delete failed.\n");
 		return (-1);
 	}
 	return (0);
 }
 
 static void
 pfr_copyout_addr(struct pfr_addr *ad, struct pfr_kentry *ke)
 {
 	bzero(ad, sizeof(*ad));
 	if (ke == NULL)
 		return;
 	ad->pfra_af = ke->pfrke_af;
 	ad->pfra_net = ke->pfrke_net;
 	ad->pfra_not = ke->pfrke_not;
 	if (ad->pfra_af == AF_INET)
 		ad->pfra_ip4addr = ke->pfrke_sa.sin.sin_addr;
 	else if (ad->pfra_af == AF_INET6)
 		ad->pfra_ip6addr = ke->pfrke_sa.sin6.sin6_addr;
 }
 
 static int
 pfr_walktree(struct radix_node *rn, void *arg)
 {
 	struct pfr_kentry	*ke = (struct pfr_kentry *)rn;
 	struct pfr_walktree	*w = arg;
 
 	switch (w->pfrw_op) {
 	case PFRW_MARK:
 		ke->pfrke_mark = 0;
 		break;
 	case PFRW_SWEEP:
 		if (ke->pfrke_mark)
 			break;
 		/* FALLTHROUGH */
 	case PFRW_ENQUEUE:
 		SLIST_INSERT_HEAD(w->pfrw_workq, ke, pfrke_workq);
 		w->pfrw_cnt++;
 		break;
 	case PFRW_GET_ADDRS:
 		if (w->pfrw_free-- > 0) {
 			pfr_copyout_addr(w->pfrw_addr, ke);
 			w->pfrw_addr++;
 		}
 		break;
 	case PFRW_GET_ASTATS:
 		if (w->pfrw_free-- > 0) {
 			struct pfr_astats as;
 
 			pfr_copyout_addr(&as.pfras_a, ke);
 
 			if (ke->pfrke_counters) {
 				bcopy(ke->pfrke_counters->pfrkc_packets,
 				    as.pfras_packets, sizeof(as.pfras_packets));
 				bcopy(ke->pfrke_counters->pfrkc_bytes,
 				    as.pfras_bytes, sizeof(as.pfras_bytes));
 			} else {
 				bzero(as.pfras_packets, sizeof(as.pfras_packets));
 				bzero(as.pfras_bytes, sizeof(as.pfras_bytes));
 				as.pfras_a.pfra_fback = PFR_FB_NOCOUNT;
 			}
 			as.pfras_tzero = ke->pfrke_tzero;
 
 			bcopy(&as, w->pfrw_astats, sizeof(as));
 			w->pfrw_astats++;
 		}
 		break;
 	case PFRW_POOL_GET:
 		if (ke->pfrke_not)
 			break; /* negative entries are ignored */
 		if (!w->pfrw_cnt--) {
 			w->pfrw_kentry = ke;
 			return (1); /* finish search */
 		}
 		break;
 	case PFRW_DYNADDR_UPDATE:
 	    {
 		union sockaddr_union	pfr_mask;
 
 		if (ke->pfrke_af == AF_INET) {
 			if (w->pfrw_dyn->pfid_acnt4++ > 0)
 				break;
 			pfr_prepare_network(&pfr_mask, AF_INET, ke->pfrke_net);
 			w->pfrw_dyn->pfid_addr4 = *SUNION2PF(&ke->pfrke_sa,
 			    AF_INET);
 			w->pfrw_dyn->pfid_mask4 = *SUNION2PF(&pfr_mask,
 			    AF_INET);
 		} else if (ke->pfrke_af == AF_INET6){
 			if (w->pfrw_dyn->pfid_acnt6++ > 0)
 				break;
 			pfr_prepare_network(&pfr_mask, AF_INET6, ke->pfrke_net);
 			w->pfrw_dyn->pfid_addr6 = *SUNION2PF(&ke->pfrke_sa,
 			    AF_INET6);
 			w->pfrw_dyn->pfid_mask6 = *SUNION2PF(&pfr_mask,
 			    AF_INET6);
 		}
 		break;
 	    }
 	}
 	return (0);
 }
 
 int
 pfr_clr_tables(struct pfr_table *filter, int *ndel, int flags)
 {
 	struct pfr_ktableworkq	 workq;
 	struct pfr_ktable	*p;
 	int			 xdel = 0;
 
 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ALLRSETS);
 	if (pfr_fix_anchor(filter->pfrt_anchor))
 		return (EINVAL);
 	if (pfr_table_count(filter, flags) < 0)
 		return (ENOENT);
 
 	SLIST_INIT(&workq);
 	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
 		if (pfr_skip_table(filter, p, flags))
 			continue;
 		if (!strcmp(p->pfrkt_anchor, PF_RESERVED_ANCHOR))
 			continue;
 		if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE))
 			continue;
 		p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_ACTIVE;
 		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
 		xdel++;
 	}
 	if (!(flags & PFR_FLAG_DUMMY))
 		pfr_setflags_ktables(&workq);
 	if (ndel != NULL)
 		*ndel = xdel;
 	return (0);
 }
 
 int
 pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags)
 {
 	struct pfr_ktableworkq	 addq, changeq;
 	struct pfr_ktable	*p, *q, *r, key;
 	int			 i, rv, xadd = 0;
 	long			 tzero = time_second;
 
 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
 	SLIST_INIT(&addq);
 	SLIST_INIT(&changeq);
 	for (i = 0; i < size; i++) {
 		bcopy(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t));
 		if (pfr_validate_table(&key.pfrkt_t, PFR_TFLAG_USRMASK,
 		    flags & PFR_FLAG_USERIOCTL))
 			senderr(EINVAL);
 		key.pfrkt_flags |= PFR_TFLAG_ACTIVE;
 		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
 		if (p == NULL) {
 			p = pfr_create_ktable(&key.pfrkt_t, tzero, 1);
 			if (p == NULL)
 				senderr(ENOMEM);
 			SLIST_FOREACH(q, &addq, pfrkt_workq) {
 				if (!pfr_ktable_compare(p, q))
 					goto _skip;
 			}
 			SLIST_INSERT_HEAD(&addq, p, pfrkt_workq);
 			xadd++;
 			if (!key.pfrkt_anchor[0])
 				goto _skip;
 
 			/* find or create root table */
 			bzero(key.pfrkt_anchor, sizeof(key.pfrkt_anchor));
 			r = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
 			if (r != NULL) {
 				p->pfrkt_root = r;
 				goto _skip;
 			}
 			SLIST_FOREACH(q, &addq, pfrkt_workq) {
 				if (!pfr_ktable_compare(&key, q)) {
 					p->pfrkt_root = q;
 					goto _skip;
 				}
 			}
 			key.pfrkt_flags = 0;
 			r = pfr_create_ktable(&key.pfrkt_t, 0, 1);
 			if (r == NULL)
 				senderr(ENOMEM);
 			SLIST_INSERT_HEAD(&addq, r, pfrkt_workq);
 			p->pfrkt_root = r;
 		} else if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
 			SLIST_FOREACH(q, &changeq, pfrkt_workq)
 				if (!pfr_ktable_compare(&key, q))
 					goto _skip;
 			p->pfrkt_nflags = (p->pfrkt_flags &
 			    ~PFR_TFLAG_USRMASK) | key.pfrkt_flags;
 			SLIST_INSERT_HEAD(&changeq, p, pfrkt_workq);
 			xadd++;
 		}
 _skip:
 	;
 	}
 	if (!(flags & PFR_FLAG_DUMMY)) {
 		pfr_insert_ktables(&addq);
 		pfr_setflags_ktables(&changeq);
 	} else
 		 pfr_destroy_ktables(&addq, 0);
 	if (nadd != NULL)
 		*nadd = xadd;
 	return (0);
 _bad:
 	pfr_destroy_ktables(&addq, 0);
 	return (rv);
 }
 
 int
 pfr_del_tables(struct pfr_table *tbl, int size, int *ndel, int flags)
 {
 	struct pfr_ktableworkq	 workq;
 	struct pfr_ktable	*p, *q, key;
 	int			 i, xdel = 0;
 
 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
 	SLIST_INIT(&workq);
 	for (i = 0; i < size; i++) {
 		bcopy(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t));
 		if (pfr_validate_table(&key.pfrkt_t, 0,
 		    flags & PFR_FLAG_USERIOCTL))
 			return (EINVAL);
 		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
 		if (p != NULL && (p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
 			SLIST_FOREACH(q, &workq, pfrkt_workq)
 				if (!pfr_ktable_compare(p, q))
 					goto _skip;
 			p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_ACTIVE;
 			SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
 			xdel++;
 		}
 _skip:
 	;
 	}
 
 	if (!(flags & PFR_FLAG_DUMMY))
 		pfr_setflags_ktables(&workq);
 	if (ndel != NULL)
 		*ndel = xdel;
 	return (0);
 }
 
 int
 pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size,
 	int flags)
 {
 	struct pfr_ktable	*p;
 	int			 n, nn;
 
 	PF_RULES_RASSERT();
 
 	ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS);
 	if (pfr_fix_anchor(filter->pfrt_anchor))
 		return (EINVAL);
 	n = nn = pfr_table_count(filter, flags);
 	if (n < 0)
 		return (ENOENT);
 	if (n > *size) {
 		*size = n;
 		return (0);
 	}
 	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
 		if (pfr_skip_table(filter, p, flags))
 			continue;
 		if (n-- <= 0)
 			continue;
 		bcopy(&p->pfrkt_t, tbl++, sizeof(*tbl));
 	}
 
 	KASSERT(n == 0, ("%s: corruption detected (%d)", __func__, n));
 
 	*size = nn;
 	return (0);
 }
 
 int
 pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size,
 	int flags)
 {
 	struct pfr_ktable	*p;
 	struct pfr_ktableworkq	 workq;
 	int			 n, nn;
 	long			 tzero = time_second;
 
 	/* XXX PFR_FLAG_CLSTATS disabled */
 	ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS);
 	if (pfr_fix_anchor(filter->pfrt_anchor))
 		return (EINVAL);
 	n = nn = pfr_table_count(filter, flags);
 	if (n < 0)
 		return (ENOENT);
 	if (n > *size) {
 		*size = n;
 		return (0);
 	}
 	SLIST_INIT(&workq);
 	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
 		if (pfr_skip_table(filter, p, flags))
 			continue;
 		if (n-- <= 0)
 			continue;
 		bcopy(&p->pfrkt_ts, tbl++, sizeof(*tbl));
 		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
 	}
 	if (flags & PFR_FLAG_CLSTATS)
 		pfr_clstats_ktables(&workq, tzero,
 		    flags & PFR_FLAG_ADDRSTOO);
 
 	KASSERT(n == 0, ("%s: corruption detected (%d)", __func__, n));
 
 	*size = nn;
 	return (0);
 }
 
 int
 pfr_clr_tstats(struct pfr_table *tbl, int size, int *nzero, int flags)
 {
 	struct pfr_ktableworkq	 workq;
 	struct pfr_ktable	*p, key;
 	int			 i, xzero = 0;
 	long			 tzero = time_second;
 
 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ADDRSTOO);
 	SLIST_INIT(&workq);
 	for (i = 0; i < size; i++) {
 		bcopy(tbl + i, &key.pfrkt_t, sizeof(key.pfrkt_t));
 		if (pfr_validate_table(&key.pfrkt_t, 0, 0))
 			return (EINVAL);
 		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
 		if (p != NULL) {
 			SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
 			xzero++;
 		}
 	}
 	if (!(flags & PFR_FLAG_DUMMY))
 		pfr_clstats_ktables(&workq, tzero, flags & PFR_FLAG_ADDRSTOO);
 	if (nzero != NULL)
 		*nzero = xzero;
 	return (0);
 }
 
 int
 pfr_set_tflags(struct pfr_table *tbl, int size, int setflag, int clrflag,
 	int *nchange, int *ndel, int flags)
 {
 	struct pfr_ktableworkq	 workq;
 	struct pfr_ktable	*p, *q, key;
 	int			 i, xchange = 0, xdel = 0;
 
 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
 	if ((setflag & ~PFR_TFLAG_USRMASK) ||
 	    (clrflag & ~PFR_TFLAG_USRMASK) ||
 	    (setflag & clrflag))
 		return (EINVAL);
 	SLIST_INIT(&workq);
 	for (i = 0; i < size; i++) {
 		bcopy(tbl + i, &key.pfrkt_t, sizeof(key.pfrkt_t));
 		if (pfr_validate_table(&key.pfrkt_t, 0,
 		    flags & PFR_FLAG_USERIOCTL))
 			return (EINVAL);
 		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
 		if (p != NULL && (p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
 			p->pfrkt_nflags = (p->pfrkt_flags | setflag) &
 			    ~clrflag;
 			if (p->pfrkt_nflags == p->pfrkt_flags)
 				goto _skip;
 			SLIST_FOREACH(q, &workq, pfrkt_workq)
 				if (!pfr_ktable_compare(p, q))
 					goto _skip;
 			SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
 			if ((p->pfrkt_flags & PFR_TFLAG_PERSIST) &&
 			    (clrflag & PFR_TFLAG_PERSIST) &&
 			    !(p->pfrkt_flags & PFR_TFLAG_REFERENCED))
 				xdel++;
 			else
 				xchange++;
 		}
 _skip:
 	;
 	}
 	if (!(flags & PFR_FLAG_DUMMY))
 		pfr_setflags_ktables(&workq);
 	if (nchange != NULL)
 		*nchange = xchange;
 	if (ndel != NULL)
 		*ndel = xdel;
 	return (0);
 }
 
 int
 pfr_ina_begin(struct pfr_table *trs, u_int32_t *ticket, int *ndel, int flags)
 {
 	struct pfr_ktableworkq	 workq;
 	struct pfr_ktable	*p;
 	struct pf_ruleset	*rs;
 	int			 xdel = 0;
 
 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
 	rs = pf_find_or_create_ruleset(trs->pfrt_anchor);
 	if (rs == NULL)
 		return (ENOMEM);
 	SLIST_INIT(&workq);
 	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
 		if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) ||
 		    pfr_skip_table(trs, p, 0))
 			continue;
 		p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_INACTIVE;
 		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
 		xdel++;
 	}
 	if (!(flags & PFR_FLAG_DUMMY)) {
 		pfr_setflags_ktables(&workq);
 		if (ticket != NULL)
 			*ticket = ++rs->tticket;
 		rs->topen = 1;
 	} else
 		pf_remove_if_empty_ruleset(rs);
 	if (ndel != NULL)
 		*ndel = xdel;
 	return (0);
 }
 
 int
 pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size,
     int *nadd, int *naddr, u_int32_t ticket, int flags)
 {
 	struct pfr_ktableworkq	 tableq;
 	struct pfr_kentryworkq	 addrq;
 	struct pfr_ktable	*kt, *rt, *shadow, key;
 	struct pfr_kentry	*p;
 	struct pfr_addr		*ad;
 	struct pf_ruleset	*rs;
 	int			 i, rv, xadd = 0, xaddr = 0;
 
 	PF_RULES_WASSERT();
 
 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ADDRSTOO);
 	if (size && !(flags & PFR_FLAG_ADDRSTOO))
 		return (EINVAL);
 	if (pfr_validate_table(tbl, PFR_TFLAG_USRMASK,
 	    flags & PFR_FLAG_USERIOCTL))
 		return (EINVAL);
 	rs = pf_find_ruleset(tbl->pfrt_anchor);
 	if (rs == NULL || !rs->topen || ticket != rs->tticket)
 		return (EBUSY);
 	tbl->pfrt_flags |= PFR_TFLAG_INACTIVE;
 	SLIST_INIT(&tableq);
 	kt = RB_FIND(pfr_ktablehead, &pfr_ktables, (struct pfr_ktable *)tbl);
 	if (kt == NULL) {
 		kt = pfr_create_ktable(tbl, 0, 1);
 		if (kt == NULL)
 			return (ENOMEM);
 		SLIST_INSERT_HEAD(&tableq, kt, pfrkt_workq);
 		xadd++;
 		if (!tbl->pfrt_anchor[0])
 			goto _skip;
 
 		/* find or create root table */
 		bzero(&key, sizeof(key));
 		strlcpy(key.pfrkt_name, tbl->pfrt_name, sizeof(key.pfrkt_name));
 		rt = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
 		if (rt != NULL) {
 			kt->pfrkt_root = rt;
 			goto _skip;
 		}
 		rt = pfr_create_ktable(&key.pfrkt_t, 0, 1);
 		if (rt == NULL) {
 			pfr_destroy_ktables(&tableq, 0);
 			return (ENOMEM);
 		}
 		SLIST_INSERT_HEAD(&tableq, rt, pfrkt_workq);
 		kt->pfrkt_root = rt;
 	} else if (!(kt->pfrkt_flags & PFR_TFLAG_INACTIVE))
 		xadd++;
 _skip:
 	shadow = pfr_create_ktable(tbl, 0, 0);
 	if (shadow == NULL) {
 		pfr_destroy_ktables(&tableq, 0);
 		return (ENOMEM);
 	}
 	SLIST_INIT(&addrq);
 	for (i = 0, ad = addr; i < size; i++, ad++) {
 		if (pfr_validate_addr(ad))
 			senderr(EINVAL);
 		if (pfr_lookup_addr(shadow, ad, 1) != NULL)
 			continue;
 		p = pfr_create_kentry(ad);
 		if (p == NULL)
 			senderr(ENOMEM);
 		if (pfr_route_kentry(shadow, p)) {
 			pfr_destroy_kentry(p);
 			continue;
 		}
 		SLIST_INSERT_HEAD(&addrq, p, pfrke_workq);
 		xaddr++;
 	}
 	if (!(flags & PFR_FLAG_DUMMY)) {
 		if (kt->pfrkt_shadow != NULL)
 			pfr_destroy_ktable(kt->pfrkt_shadow, 1);
 		kt->pfrkt_flags |= PFR_TFLAG_INACTIVE;
 		pfr_insert_ktables(&tableq);
 		shadow->pfrkt_cnt = (flags & PFR_FLAG_ADDRSTOO) ?
 		    xaddr : NO_ADDRESSES;
 		kt->pfrkt_shadow = shadow;
 	} else {
 		pfr_clean_node_mask(shadow, &addrq);
 		pfr_destroy_ktable(shadow, 0);
 		pfr_destroy_ktables(&tableq, 0);
 		pfr_destroy_kentries(&addrq);
 	}
 	if (nadd != NULL)
 		*nadd = xadd;
 	if (naddr != NULL)
 		*naddr = xaddr;
 	return (0);
 _bad:
 	pfr_destroy_ktable(shadow, 0);
 	pfr_destroy_ktables(&tableq, 0);
 	pfr_destroy_kentries(&addrq);
 	return (rv);
 }
 
 int
 pfr_ina_rollback(struct pfr_table *trs, u_int32_t ticket, int *ndel, int flags)
 {
 	struct pfr_ktableworkq	 workq;
 	struct pfr_ktable	*p;
 	struct pf_ruleset	*rs;
 	int			 xdel = 0;
 
 	PF_RULES_WASSERT();
 
 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
 	rs = pf_find_ruleset(trs->pfrt_anchor);
 	if (rs == NULL || !rs->topen || ticket != rs->tticket)
 		return (0);
 	SLIST_INIT(&workq);
 	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
 		if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) ||
 		    pfr_skip_table(trs, p, 0))
 			continue;
 		p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_INACTIVE;
 		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
 		xdel++;
 	}
 	if (!(flags & PFR_FLAG_DUMMY)) {
 		pfr_setflags_ktables(&workq);
 		rs->topen = 0;
 		pf_remove_if_empty_ruleset(rs);
 	}
 	if (ndel != NULL)
 		*ndel = xdel;
 	return (0);
 }
 
 int
 pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd,
     int *nchange, int flags)
 {
 	struct pfr_ktable	*p, *q;
 	struct pfr_ktableworkq	 workq;
 	struct pf_ruleset	*rs;
 	int			 xadd = 0, xchange = 0;
 	long			 tzero = time_second;
 
 	PF_RULES_WASSERT();
 
 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
 	rs = pf_find_ruleset(trs->pfrt_anchor);
 	if (rs == NULL || !rs->topen || ticket != rs->tticket)
 		return (EBUSY);
 
 	SLIST_INIT(&workq);
 	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
 		if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) ||
 		    pfr_skip_table(trs, p, 0))
 			continue;
 		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
 		if (p->pfrkt_flags & PFR_TFLAG_ACTIVE)
 			xchange++;
 		else
 			xadd++;
 	}
 
 	if (!(flags & PFR_FLAG_DUMMY)) {
 		for (p = SLIST_FIRST(&workq); p != NULL; p = q) {
 			q = SLIST_NEXT(p, pfrkt_workq);
 			pfr_commit_ktable(p, tzero);
 		}
 		rs->topen = 0;
 		pf_remove_if_empty_ruleset(rs);
 	}
 	if (nadd != NULL)
 		*nadd = xadd;
 	if (nchange != NULL)
 		*nchange = xchange;
 
 	return (0);
 }
 
 static void
 pfr_commit_ktable(struct pfr_ktable *kt, long tzero)
 {
 	struct pfr_ktable	*shadow = kt->pfrkt_shadow;
 	int			 nflags;
 
 	PF_RULES_WASSERT();
 
 	if (shadow->pfrkt_cnt == NO_ADDRESSES) {
 		if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
 			pfr_clstats_ktable(kt, tzero, 1);
 	} else if (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) {
 		/* kt might contain addresses */
 		struct pfr_kentryworkq	 addrq, addq, changeq, delq, garbageq;
 		struct pfr_kentry	*p, *q, *next;
 		struct pfr_addr		 ad;
 
 		pfr_enqueue_addrs(shadow, &addrq, NULL, 0);
 		pfr_mark_addrs(kt);
 		SLIST_INIT(&addq);
 		SLIST_INIT(&changeq);
 		SLIST_INIT(&delq);
 		SLIST_INIT(&garbageq);
 		pfr_clean_node_mask(shadow, &addrq);
 		for (p = SLIST_FIRST(&addrq); p != NULL; p = next) {
 			next = SLIST_NEXT(p, pfrke_workq);	/* XXX */
 			pfr_copyout_addr(&ad, p);
 			q = pfr_lookup_addr(kt, &ad, 1);
 			if (q != NULL) {
 				if (q->pfrke_not != p->pfrke_not)
 					SLIST_INSERT_HEAD(&changeq, q,
 					    pfrke_workq);
 				q->pfrke_mark = 1;
 				SLIST_INSERT_HEAD(&garbageq, p, pfrke_workq);
 			} else {
 				p->pfrke_tzero = tzero;
 				SLIST_INSERT_HEAD(&addq, p, pfrke_workq);
 			}
 		}
 		pfr_enqueue_addrs(kt, &delq, NULL, ENQUEUE_UNMARKED_ONLY);
 		pfr_insert_kentries(kt, &addq, tzero);
 		pfr_remove_kentries(kt, &delq);
 		pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG);
 		pfr_destroy_kentries(&garbageq);
 	} else {
 		/* kt cannot contain addresses */
 		SWAP(struct radix_node_head *, kt->pfrkt_ip4,
 		    shadow->pfrkt_ip4);
 		SWAP(struct radix_node_head *, kt->pfrkt_ip6,
 		    shadow->pfrkt_ip6);
 		SWAP(int, kt->pfrkt_cnt, shadow->pfrkt_cnt);
 		pfr_clstats_ktable(kt, tzero, 1);
 	}
 	nflags = ((shadow->pfrkt_flags & PFR_TFLAG_USRMASK) |
 	    (kt->pfrkt_flags & PFR_TFLAG_SETMASK) | PFR_TFLAG_ACTIVE)
 		& ~PFR_TFLAG_INACTIVE;
 	pfr_destroy_ktable(shadow, 0);
 	kt->pfrkt_shadow = NULL;
 	pfr_setflags_ktable(kt, nflags);
 }
 
 static int
 pfr_validate_table(struct pfr_table *tbl, int allowedflags, int no_reserved)
 {
 	int i;
 
 	if (!tbl->pfrt_name[0])
 		return (-1);
 	if (no_reserved && !strcmp(tbl->pfrt_anchor, PF_RESERVED_ANCHOR))
 		 return (-1);
 	if (tbl->pfrt_name[PF_TABLE_NAME_SIZE-1])
 		return (-1);
 	for (i = strlen(tbl->pfrt_name); i < PF_TABLE_NAME_SIZE; i++)
 		if (tbl->pfrt_name[i])
 			return (-1);
 	if (pfr_fix_anchor(tbl->pfrt_anchor))
 		return (-1);
 	if (tbl->pfrt_flags & ~allowedflags)
 		return (-1);
 	return (0);
 }
 
 /*
  * Rewrite anchors referenced by tables to remove slashes
  * and check for validity.
  */
 static int
 pfr_fix_anchor(char *anchor)
 {
 	size_t siz = MAXPATHLEN;
 	int i;
 
 	if (anchor[0] == '/') {
 		char *path;
 		int off;
 
 		path = anchor;
 		off = 1;
 		while (*++path == '/')
 			off++;
 		bcopy(path, anchor, siz - off);
 		memset(anchor + siz - off, 0, off);
 	}
 	if (anchor[siz - 1])
 		return (-1);
 	for (i = strlen(anchor); i < siz; i++)
 		if (anchor[i])
 			return (-1);
 	return (0);
 }
 
 static int
 pfr_table_count(struct pfr_table *filter, int flags)
 {
 	struct pf_ruleset *rs;
 
 	PF_RULES_ASSERT();
 
 	if (flags & PFR_FLAG_ALLRSETS)
 		return (pfr_ktable_cnt);
 	if (filter->pfrt_anchor[0]) {
 		rs = pf_find_ruleset(filter->pfrt_anchor);
 		return ((rs != NULL) ? rs->tables : -1);
 	}
 	return (pf_main_ruleset.tables);
 }
 
 static int
 pfr_skip_table(struct pfr_table *filter, struct pfr_ktable *kt, int flags)
 {
 	if (flags & PFR_FLAG_ALLRSETS)
 		return (0);
 	if (strcmp(filter->pfrt_anchor, kt->pfrkt_anchor))
 		return (1);
 	return (0);
 }
 
 static void
 pfr_insert_ktables(struct pfr_ktableworkq *workq)
 {
 	struct pfr_ktable	*p;
 
 	SLIST_FOREACH(p, workq, pfrkt_workq)
 		pfr_insert_ktable(p);
 }
 
 static void
 pfr_insert_ktable(struct pfr_ktable *kt)
 {
 
 	PF_RULES_WASSERT();
 
 	RB_INSERT(pfr_ktablehead, &pfr_ktables, kt);
 	pfr_ktable_cnt++;
 	if (kt->pfrkt_root != NULL)
 		if (!kt->pfrkt_root->pfrkt_refcnt[PFR_REFCNT_ANCHOR]++)
 			pfr_setflags_ktable(kt->pfrkt_root,
 			    kt->pfrkt_root->pfrkt_flags|PFR_TFLAG_REFDANCHOR);
 }
 
 static void
 pfr_setflags_ktables(struct pfr_ktableworkq *workq)
 {
 	struct pfr_ktable	*p, *q;
 
 	for (p = SLIST_FIRST(workq); p; p = q) {
 		q = SLIST_NEXT(p, pfrkt_workq);
 		pfr_setflags_ktable(p, p->pfrkt_nflags);
 	}
 }
 
 static void
 pfr_setflags_ktable(struct pfr_ktable *kt, int newf)
 {
 	struct pfr_kentryworkq	addrq;
 
 	PF_RULES_WASSERT();
 
 	if (!(newf & PFR_TFLAG_REFERENCED) &&
 	    !(newf & PFR_TFLAG_PERSIST))
 		newf &= ~PFR_TFLAG_ACTIVE;
 	if (!(newf & PFR_TFLAG_ACTIVE))
 		newf &= ~PFR_TFLAG_USRMASK;
 	if (!(newf & PFR_TFLAG_SETMASK)) {
 		RB_REMOVE(pfr_ktablehead, &pfr_ktables, kt);
 		if (kt->pfrkt_root != NULL)
 			if (!--kt->pfrkt_root->pfrkt_refcnt[PFR_REFCNT_ANCHOR])
 				pfr_setflags_ktable(kt->pfrkt_root,
 				    kt->pfrkt_root->pfrkt_flags &
 					~PFR_TFLAG_REFDANCHOR);
 		pfr_destroy_ktable(kt, 1);
 		pfr_ktable_cnt--;
 		return;
 	}
 	if (!(newf & PFR_TFLAG_ACTIVE) && kt->pfrkt_cnt) {
 		pfr_enqueue_addrs(kt, &addrq, NULL, 0);
 		pfr_remove_kentries(kt, &addrq);
 	}
 	if (!(newf & PFR_TFLAG_INACTIVE) && kt->pfrkt_shadow != NULL) {
 		pfr_destroy_ktable(kt->pfrkt_shadow, 1);
 		kt->pfrkt_shadow = NULL;
 	}
 	kt->pfrkt_flags = newf;
 }
 
 static void
 pfr_clstats_ktables(struct pfr_ktableworkq *workq, long tzero, int recurse)
 {
 	struct pfr_ktable	*p;
 
 	SLIST_FOREACH(p, workq, pfrkt_workq)
 		pfr_clstats_ktable(p, tzero, recurse);
 }
 
 static void
 pfr_clstats_ktable(struct pfr_ktable *kt, long tzero, int recurse)
 {
 	struct pfr_kentryworkq	 addrq;
 
 	if (recurse) {
 		pfr_enqueue_addrs(kt, &addrq, NULL, 0);
 		pfr_clstats_kentries(&addrq, tzero, 0);
 	}
 	bzero(kt->pfrkt_packets, sizeof(kt->pfrkt_packets));
 	bzero(kt->pfrkt_bytes, sizeof(kt->pfrkt_bytes));
 	kt->pfrkt_match = kt->pfrkt_nomatch = 0;
 	kt->pfrkt_tzero = tzero;
 }
 
 static struct pfr_ktable *
 pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset)
 {
 	struct pfr_ktable	*kt;
 	struct pf_ruleset	*rs;
 
 	PF_RULES_WASSERT();
 
 	kt = malloc(sizeof(*kt), M_PFTABLE, M_NOWAIT|M_ZERO);
 	if (kt == NULL)
 		return (NULL);
 	kt->pfrkt_t = *tbl;
 
 	if (attachruleset) {
 		rs = pf_find_or_create_ruleset(tbl->pfrt_anchor);
 		if (!rs) {
 			pfr_destroy_ktable(kt, 0);
 			return (NULL);
 		}
 		kt->pfrkt_rs = rs;
 		rs->tables++;
 	}
 
 	if (!rn_inithead((void **)&kt->pfrkt_ip4,
 	    offsetof(struct sockaddr_in, sin_addr) * 8) ||
 	    !rn_inithead((void **)&kt->pfrkt_ip6,
 	    offsetof(struct sockaddr_in6, sin6_addr) * 8)) {
 		pfr_destroy_ktable(kt, 0);
 		return (NULL);
 	}
 	kt->pfrkt_tzero = tzero;
 
 	return (kt);
 }
 
 static void
 pfr_destroy_ktables(struct pfr_ktableworkq *workq, int flushaddr)
 {
 	struct pfr_ktable	*p, *q;
 
 	for (p = SLIST_FIRST(workq); p; p = q) {
 		q = SLIST_NEXT(p, pfrkt_workq);
 		pfr_destroy_ktable(p, flushaddr);
 	}
 }
 
 static void
 pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr)
 {
 	struct pfr_kentryworkq	 addrq;
 
 	if (flushaddr) {
 		pfr_enqueue_addrs(kt, &addrq, NULL, 0);
 		pfr_clean_node_mask(kt, &addrq);
 		pfr_destroy_kentries(&addrq);
 	}
-	if (kt->pfrkt_ip4 != NULL) {
-		RADIX_NODE_HEAD_DESTROY(kt->pfrkt_ip4);
+	if (kt->pfrkt_ip4 != NULL)
 		rn_detachhead((void **)&kt->pfrkt_ip4);
-	}
-	if (kt->pfrkt_ip6 != NULL) {
-		RADIX_NODE_HEAD_DESTROY(kt->pfrkt_ip6);
+	if (kt->pfrkt_ip6 != NULL)
 		rn_detachhead((void **)&kt->pfrkt_ip6);
-	}
 	if (kt->pfrkt_shadow != NULL)
 		pfr_destroy_ktable(kt->pfrkt_shadow, flushaddr);
 	if (kt->pfrkt_rs != NULL) {
 		kt->pfrkt_rs->tables--;
 		pf_remove_if_empty_ruleset(kt->pfrkt_rs);
 	}
 	free(kt, M_PFTABLE);
 }
 
 static int
 pfr_ktable_compare(struct pfr_ktable *p, struct pfr_ktable *q)
 {
 	int d;
 
 	if ((d = strncmp(p->pfrkt_name, q->pfrkt_name, PF_TABLE_NAME_SIZE)))
 		return (d);
 	return (strcmp(p->pfrkt_anchor, q->pfrkt_anchor));
 }
 
 static struct pfr_ktable *
 pfr_lookup_table(struct pfr_table *tbl)
 {
 	/* struct pfr_ktable start like a struct pfr_table */
 	return (RB_FIND(pfr_ktablehead, &pfr_ktables,
 	    (struct pfr_ktable *)tbl));
 }
 
 int
 pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af)
 {
 	struct pfr_kentry	*ke = NULL;
 	int			 match;
 
 	PF_RULES_RASSERT();
 
 	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
 		kt = kt->pfrkt_root;
 	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
 		return (0);
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 	    {
 		struct sockaddr_in sin;
 
 		bzero(&sin, sizeof(sin));
 		sin.sin_len = sizeof(sin);
 		sin.sin_family = AF_INET;
 		sin.sin_addr.s_addr = a->addr32[0];
 		ke = (struct pfr_kentry *)rn_match(&sin, kt->pfrkt_ip4);
 		if (ke && KENTRY_RNF_ROOT(ke))
 			ke = NULL;
 		break;
 	    }
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 	    {
 		struct sockaddr_in6 sin6;
 
 		bzero(&sin6, sizeof(sin6));
 		sin6.sin6_len = sizeof(sin6);
 		sin6.sin6_family = AF_INET6;
 		bcopy(a, &sin6.sin6_addr, sizeof(sin6.sin6_addr));
 		ke = (struct pfr_kentry *)rn_match(&sin6, kt->pfrkt_ip6);
 		if (ke && KENTRY_RNF_ROOT(ke))
 			ke = NULL;
 		break;
 	    }
 #endif /* INET6 */
 	}
 	match = (ke && !ke->pfrke_not);
 	if (match)
 		kt->pfrkt_match++;
 	else
 		kt->pfrkt_nomatch++;
 	return (match);
 }
 
 void
 pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af,
     u_int64_t len, int dir_out, int op_pass, int notrule)
 {
 	struct pfr_kentry	*ke = NULL;
 
 	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
 		kt = kt->pfrkt_root;
 	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
 		return;
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 	    {
 		struct sockaddr_in sin;
 
 		bzero(&sin, sizeof(sin));
 		sin.sin_len = sizeof(sin);
 		sin.sin_family = AF_INET;
 		sin.sin_addr.s_addr = a->addr32[0];
 		ke = (struct pfr_kentry *)rn_match(&sin, kt->pfrkt_ip4);
 		if (ke && KENTRY_RNF_ROOT(ke))
 			ke = NULL;
 		break;
 	    }
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 	    {
 		struct sockaddr_in6 sin6;
 
 		bzero(&sin6, sizeof(sin6));
 		sin6.sin6_len = sizeof(sin6);
 		sin6.sin6_family = AF_INET6;
 		bcopy(a, &sin6.sin6_addr, sizeof(sin6.sin6_addr));
 		ke = (struct pfr_kentry *)rn_match(&sin6, kt->pfrkt_ip6);
 		if (ke && KENTRY_RNF_ROOT(ke))
 			ke = NULL;
 		break;
 	    }
 #endif /* INET6 */
 	default:
 		panic("%s: unknown address family %u", __func__, af);
 	}
 	if ((ke == NULL || ke->pfrke_not) != notrule) {
 		if (op_pass != PFR_OP_PASS)
 			printf("pfr_update_stats: assertion failed.\n");
 		op_pass = PFR_OP_XPASS;
 	}
 	kt->pfrkt_packets[dir_out][op_pass]++;
 	kt->pfrkt_bytes[dir_out][op_pass] += len;
 	if (ke != NULL && op_pass != PFR_OP_XPASS &&
 	    (kt->pfrkt_flags & PFR_TFLAG_COUNTERS)) {
 		if (ke->pfrke_counters == NULL)
 			ke->pfrke_counters = uma_zalloc(V_pfr_kcounters_z,
 			    M_NOWAIT | M_ZERO);
 		if (ke->pfrke_counters != NULL) {
 			ke->pfrke_counters->pfrkc_packets[dir_out][op_pass]++;
 			ke->pfrke_counters->pfrkc_bytes[dir_out][op_pass] += len;
 		}
 	}
 }
 
 struct pfr_ktable *
 pfr_attach_table(struct pf_ruleset *rs, char *name)
 {
 	struct pfr_ktable	*kt, *rt;
 	struct pfr_table	 tbl;
 	struct pf_anchor	*ac = rs->anchor;
 
 	PF_RULES_WASSERT();
 
 	bzero(&tbl, sizeof(tbl));
 	strlcpy(tbl.pfrt_name, name, sizeof(tbl.pfrt_name));
 	if (ac != NULL)
 		strlcpy(tbl.pfrt_anchor, ac->path, sizeof(tbl.pfrt_anchor));
 	kt = pfr_lookup_table(&tbl);
 	if (kt == NULL) {
 		kt = pfr_create_ktable(&tbl, time_second, 1);
 		if (kt == NULL)
 			return (NULL);
 		if (ac != NULL) {
 			bzero(tbl.pfrt_anchor, sizeof(tbl.pfrt_anchor));
 			rt = pfr_lookup_table(&tbl);
 			if (rt == NULL) {
 				rt = pfr_create_ktable(&tbl, 0, 1);
 				if (rt == NULL) {
 					pfr_destroy_ktable(kt, 0);
 					return (NULL);
 				}
 				pfr_insert_ktable(rt);
 			}
 			kt->pfrkt_root = rt;
 		}
 		pfr_insert_ktable(kt);
 	}
 	if (!kt->pfrkt_refcnt[PFR_REFCNT_RULE]++)
 		pfr_setflags_ktable(kt, kt->pfrkt_flags|PFR_TFLAG_REFERENCED);
 	return (kt);
 }
 
 void
 pfr_detach_table(struct pfr_ktable *kt)
 {
 
 	PF_RULES_WASSERT();
 	KASSERT(kt->pfrkt_refcnt[PFR_REFCNT_RULE] > 0, ("%s: refcount %d\n",
 	    __func__, kt->pfrkt_refcnt[PFR_REFCNT_RULE]));
 
 	if (!--kt->pfrkt_refcnt[PFR_REFCNT_RULE])
 		pfr_setflags_ktable(kt, kt->pfrkt_flags&~PFR_TFLAG_REFERENCED);
 }
 
 int
 pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter,
     sa_family_t af)
 {
 	struct pf_addr		 *addr, *cur, *mask;
 	union sockaddr_union	 uaddr, umask;
 	struct pfr_kentry	*ke, *ke2 = NULL;
 	int			 idx = -1, use_counter = 0;
 
 	switch (af) {
 	case AF_INET:
 		uaddr.sin.sin_len = sizeof(struct sockaddr_in);
 		uaddr.sin.sin_family = AF_INET;
 		break;
 	case AF_INET6:
 		uaddr.sin6.sin6_len = sizeof(struct sockaddr_in6);
 		uaddr.sin6.sin6_family = AF_INET6;
 		break;
 	}
 	addr = SUNION2PF(&uaddr, af);
 
 	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
 		kt = kt->pfrkt_root;
 	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
 		return (-1);
 
 	if (pidx != NULL)
 		idx = *pidx;
 	if (counter != NULL && idx >= 0)
 		use_counter = 1;
 	if (idx < 0)
 		idx = 0;
 
 _next_block:
 	ke = pfr_kentry_byidx(kt, idx, af);
 	if (ke == NULL) {
 		kt->pfrkt_nomatch++;
 		return (1);
 	}
 	pfr_prepare_network(&umask, af, ke->pfrke_net);
 	cur = SUNION2PF(&ke->pfrke_sa, af);
 	mask = SUNION2PF(&umask, af);
 
 	if (use_counter) {
 		/* is supplied address within block? */
 		if (!PF_MATCHA(0, cur, mask, counter, af)) {
 			/* no, go to next block in table */
 			idx++;
 			use_counter = 0;
 			goto _next_block;
 		}
 		PF_ACPY(addr, counter, af);
 	} else {
 		/* use first address of block */
 		PF_ACPY(addr, cur, af);
 	}
 
 	if (!KENTRY_NETWORK(ke)) {
 		/* this is a single IP address - no possible nested block */
 		PF_ACPY(counter, addr, af);
 		*pidx = idx;
 		kt->pfrkt_match++;
 		return (0);
 	}
 	for (;;) {
 		/* we don't want to use a nested block */
 		switch (af) {
 		case AF_INET:
 			ke2 = (struct pfr_kentry *)rn_match(&uaddr,
 			    kt->pfrkt_ip4);
 			break;
 		case AF_INET6:
 			ke2 = (struct pfr_kentry *)rn_match(&uaddr,
 			    kt->pfrkt_ip6);
 			break;
 		}
 		/* no need to check KENTRY_RNF_ROOT() here */
 		if (ke2 == ke) {
 			/* lookup return the same block - perfect */
 			PF_ACPY(counter, addr, af);
 			*pidx = idx;
 			kt->pfrkt_match++;
 			return (0);
 		}
 
 		/* we need to increase the counter past the nested block */
 		pfr_prepare_network(&umask, AF_INET, ke2->pfrke_net);
 		PF_POOLMASK(addr, addr, SUNION2PF(&umask, af), &pfr_ffaddr, af);
 		PF_AINC(addr, af);
 		if (!PF_MATCHA(0, cur, mask, addr, af)) {
 			/* ok, we reached the end of our main block */
 			/* go to next block in table */
 			idx++;
 			use_counter = 0;
 			goto _next_block;
 		}
 	}
 }
 
 static struct pfr_kentry *
 pfr_kentry_byidx(struct pfr_ktable *kt, int idx, int af)
 {
 	struct pfr_walktree	w;
 
 	bzero(&w, sizeof(w));
 	w.pfrw_op = PFRW_POOL_GET;
 	w.pfrw_cnt = idx;
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
 		return (w.pfrw_kentry);
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w);
 		return (w.pfrw_kentry);
 #endif /* INET6 */
 	default:
 		return (NULL);
 	}
 }
 
 void
 pfr_dynaddr_update(struct pfr_ktable *kt, struct pfi_dynaddr *dyn)
 {
 	struct pfr_walktree	w;
 
 	bzero(&w, sizeof(w));
 	w.pfrw_op = PFRW_DYNADDR_UPDATE;
 	w.pfrw_dyn = dyn;
 
 	dyn->pfid_acnt4 = 0;
 	dyn->pfid_acnt6 = 0;
 	if (!dyn->pfid_af || dyn->pfid_af == AF_INET)
 		kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
 	if (!dyn->pfid_af || dyn->pfid_af == AF_INET6)
 		kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w);
 }