Index: projects/pnet/sys/kern/kern_poll.c
===================================================================
--- projects/pnet/sys/kern/kern_poll.c	(revision 193105)
+++ projects/pnet/sys/kern/kern_poll.c	(revision 193106)
@@ -1,610 +1,610 @@
 /*-
  * Copyright (c) 2001-2002 Luigi Rizzo
  *
  * Supported by: the Xorp Project (www.xorp.org)
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_route.h"
 #include "opt_device_polling.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/socket.h>			/* needed by net/if.h		*/
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/vimage.h>
 
 #include <net/if.h>			/* for IFF_* flags		*/
 #include <net/netisr.h>			/* for NETISR_POLL		*/
 #include <net/route.h>
 #include <net/vnet.h>
 
 static int poll_switch(SYSCTL_HANDLER_ARGS);
 
 void hardclock_device_poll(void);	/* hook from hardclock		*/
 
 static struct mtx	poll_mtx;
 
 /*
  * Polling support for [network] device drivers.
  *
  * Drivers which support this feature can register with the
  * polling code.
  *
  * If registration is successful, the driver must disable interrupts,
  * and further I/O is performed through the handler, which is invoked
  * (at least once per clock tick) with 3 arguments: the "arg" passed at
  * register time (a struct ifnet pointer), a command, and a "count" limit.
  *
  * The command can be one of the following:
  *  POLL_ONLY: quick move of "count" packets from input/output queues.
  *  POLL_AND_CHECK_STATUS: as above, plus check status registers or do
  *	other more expensive operations. This command is issued periodically
  *	but less frequently than POLL_ONLY.
  *
  * The count limit specifies how much work the handler can do during the
  * call -- typically this is the number of packets to be received, or
  * transmitted, etc. (drivers are free to interpret this number, as long
  * as the max time spent in the function grows roughly linearly with the
  * count).
  *
  * Polling is enabled and disabled via setting IFCAP_POLLING flag on
  * the interface. The driver ioctl handler should register interface
  * with polling and disable interrupts, if registration was successful.
  *
  * A second variable controls the sharing of CPU between polling/kernel
  * network processing, and other activities (typically userlevel tasks):
  * kern.polling.user_frac (between 0 and 100, default 50) sets the share
  * of CPU allocated to user tasks. CPU is allocated proportionally to the
  * shares, by dynamically adjusting the "count" (poll_burst).
  *
  * Other parameters can should be left to their default values.
  * The following constraints hold
  *
  *	1 <= poll_each_burst <= poll_burst <= poll_burst_max
  *	0 <= poll_each_burst
  *	MIN_POLL_BURST_MAX <= poll_burst_max <= MAX_POLL_BURST_MAX
  */
 
 #define MIN_POLL_BURST_MAX	10
 #define MAX_POLL_BURST_MAX	1000
 
 static uint32_t poll_burst = 5;
 static uint32_t poll_burst_max = 150;	/* good for 100Mbit net and HZ=1000 */
 static uint32_t poll_each_burst = 5;
 
 SYSCTL_NODE(_kern, OID_AUTO, polling, CTLFLAG_RW, 0,
 	"Device polling parameters");
 
 SYSCTL_UINT(_kern_polling, OID_AUTO, burst, CTLFLAG_RD,
 	&poll_burst, 0, "Current polling burst size");
 
 static int	netisr_poll_scheduled;
 static int	netisr_pollmore_scheduled;
 
 static int poll_burst_max_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	uint32_t val = poll_burst_max;
 	int error;
 
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr )
 		return (error);
 	if (val < MIN_POLL_BURST_MAX || val > MAX_POLL_BURST_MAX)
 		return (EINVAL);
 
 	mtx_lock(&poll_mtx);
 	poll_burst_max = val;
 	if (poll_burst > poll_burst_max)
 		poll_burst = poll_burst_max;
 	if (poll_each_burst > poll_burst_max)
 		poll_each_burst = MIN_POLL_BURST_MAX;
 	mtx_unlock(&poll_mtx);
 
 	return (0);
 }
 SYSCTL_PROC(_kern_polling, OID_AUTO, burst_max, CTLTYPE_UINT | CTLFLAG_RW,
 	0, sizeof(uint32_t), poll_burst_max_sysctl, "I", "Max Polling burst size");
 
 static int poll_each_burst_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	uint32_t val = poll_each_burst;
 	int error;
 
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr )
 		return (error);
 	if (val < 1)
 		return (EINVAL);
 
 	mtx_lock(&poll_mtx);
 	if (val > poll_burst_max) {
 		mtx_unlock(&poll_mtx);
 		return (EINVAL);
 	}
 	poll_each_burst = val;
 	mtx_unlock(&poll_mtx);
 
 	return (0);
 }
 SYSCTL_PROC(_kern_polling, OID_AUTO, each_burst, CTLTYPE_UINT | CTLFLAG_RW,
 	0, sizeof(uint32_t), poll_each_burst_sysctl, "I",
 	"Max size of each burst");
 
 static uint32_t poll_in_idle_loop=0;	/* do we poll in idle loop ? */
 SYSCTL_UINT(_kern_polling, OID_AUTO, idle_poll, CTLFLAG_RW,
 	&poll_in_idle_loop, 0, "Enable device polling in idle loop");
 
 static uint32_t user_frac = 50;
 static int user_frac_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	uint32_t val = user_frac;
 	int error;
 
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr )
 		return (error);
 	if (val < 0 || val > 99)
 		return (EINVAL);
 
 	mtx_lock(&poll_mtx);
 	user_frac = val;
 	mtx_unlock(&poll_mtx);
 
 	return (0);
 }
 SYSCTL_PROC(_kern_polling, OID_AUTO, user_frac, CTLTYPE_UINT | CTLFLAG_RW,
 	0, sizeof(uint32_t), user_frac_sysctl, "I",
 	"Desired user fraction of cpu time");
 
 static uint32_t reg_frac_count = 0;
 static uint32_t reg_frac = 20 ;
 static int reg_frac_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	uint32_t val = reg_frac;
 	int error;
 
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr )
 		return (error);
 	if (val < 1 || val > hz)
 		return (EINVAL);
 
 	mtx_lock(&poll_mtx);
 	reg_frac = val;
 	if (reg_frac_count >= reg_frac)
 		reg_frac_count = 0;
 	mtx_unlock(&poll_mtx);
 
 	return (0);
 }
 SYSCTL_PROC(_kern_polling, OID_AUTO, reg_frac, CTLTYPE_UINT | CTLFLAG_RW,
 	0, sizeof(uint32_t), reg_frac_sysctl, "I",
 	"Every this many cycles check registers");
 
 static uint32_t short_ticks;
 SYSCTL_UINT(_kern_polling, OID_AUTO, short_ticks, CTLFLAG_RD,
 	&short_ticks, 0, "Hardclock ticks shorter than they should be");
 
 static uint32_t lost_polls;
 SYSCTL_UINT(_kern_polling, OID_AUTO, lost_polls, CTLFLAG_RD,
 	&lost_polls, 0, "How many times we would have lost a poll tick");
 
 static uint32_t pending_polls;
 SYSCTL_UINT(_kern_polling, OID_AUTO, pending_polls, CTLFLAG_RD,
 	&pending_polls, 0, "Do we need to poll again");
 
 static int residual_burst = 0;
 SYSCTL_INT(_kern_polling, OID_AUTO, residual_burst, CTLFLAG_RD,
 	&residual_burst, 0, "# of residual cycles in burst");
 
 static uint32_t poll_handlers; /* next free entry in pr[]. */
 SYSCTL_UINT(_kern_polling, OID_AUTO, handlers, CTLFLAG_RD,
 	&poll_handlers, 0, "Number of registered poll handlers");
 
 static int polling = 0;
 SYSCTL_PROC(_kern_polling, OID_AUTO, enable, CTLTYPE_UINT | CTLFLAG_RW,
 	0, sizeof(int), poll_switch, "I", "Switch polling for all interfaces");
 
 static uint32_t phase;
 SYSCTL_UINT(_kern_polling, OID_AUTO, phase, CTLFLAG_RD,
 	&phase, 0, "Polling phase");
 
 static uint32_t suspect;
 SYSCTL_UINT(_kern_polling, OID_AUTO, suspect, CTLFLAG_RD,
 	&suspect, 0, "suspect event");
 
 static uint32_t stalled;
 SYSCTL_UINT(_kern_polling, OID_AUTO, stalled, CTLFLAG_RD,
 	&stalled, 0, "potential stalls");
 
 static uint32_t idlepoll_sleeping; /* idlepoll is sleeping */
 SYSCTL_UINT(_kern_polling, OID_AUTO, idlepoll_sleeping, CTLFLAG_RD,
 	&idlepoll_sleeping, 0, "idlepoll is sleeping");
 
 
 #define POLL_LIST_LEN  128
 struct pollrec {
 	poll_handler_t	*handler;
 	struct ifnet	*ifp;
 };
 
 static struct pollrec pr[POLL_LIST_LEN];
 
 static void
 init_device_poll(void)
 {
 
 	mtx_init(&poll_mtx, "polling", NULL, MTX_DEF);
 }
 SYSINIT(device_poll, SI_SUB_CLOCKS, SI_ORDER_MIDDLE, init_device_poll, NULL);
 
 
 /*
  * Hook from hardclock. Tries to schedule a netisr, but keeps track
  * of lost ticks due to the previous handler taking too long.
  * Normally, this should not happen, because polling handler should
  * run for a short time. However, in some cases (e.g. when there are
  * changes in link status etc.) the drivers take a very long time
  * (even in the order of milliseconds) to reset and reconfigure the
  * device, causing apparent lost polls.
  *
  * The first part of the code is just for debugging purposes, and tries
  * to count how often hardclock ticks are shorter than they should,
  * meaning either stray interrupts or delayed events.
  */
 void
 hardclock_device_poll(void)
 {
 	static struct timeval prev_t, t;
 	int delta;
 
 	if (poll_handlers == 0)
 		return;
 
 	microuptime(&t);
 	delta = (t.tv_usec - prev_t.tv_usec) +
 		(t.tv_sec - prev_t.tv_sec)*1000000;
 	if (delta * hz < 500000)
 		short_ticks++;
 	else
 		prev_t = t;
 
 	if (pending_polls > 100) {
 		/*
 		 * Too much, assume it has stalled (not always true
 		 * see comment above).
 		 */
 		stalled++;
 		pending_polls = 0;
 		phase = 0;
 	}
 
 	if (phase <= 2) {
 		if (phase != 0)
 			suspect++;
 		phase = 1;
 		netisr_poll_scheduled = 1;
 		netisr_pollmore_scheduled = 1;
-		netisr2_sched_poll();
+		netisr_sched_poll();
 		phase = 2;
 	}
 	if (pending_polls++ > 0)
 		lost_polls++;
 }
 
 /*
  * ether_poll is called from the idle loop.
  */
 static void
 ether_poll(int count)
 {
 	int i;
 
 	mtx_lock(&poll_mtx);
 
 	if (count > poll_each_burst)
 		count = poll_each_burst;
 
 	for (i = 0 ; i < poll_handlers ; i++)
 		pr[i].handler(pr[i].ifp, POLL_ONLY, count);
 
 	mtx_unlock(&poll_mtx);
 }
 
 /*
  * netisr_pollmore is called after other netisr's, possibly scheduling
  * another NETISR_POLL call, or adapting the burst size for the next cycle.
  *
  * It is very bad to fetch large bursts of packets from a single card at once,
  * because the burst could take a long time to be completely processed, or
  * could saturate the intermediate queue (ipintrq or similar) leading to
  * losses or unfairness. To reduce the problem, and also to account better for
  * time spent in network-related processing, we split the burst in smaller
  * chunks of fixed size, giving control to the other netisr's between chunks.
  * This helps in improving the fairness, reducing livelock (because we
  * emulate more closely the "process to completion" that we have with
  * fastforwarding) and accounting for the work performed in low level
  * handling and forwarding.
  */
 
 static struct timeval poll_start_t;
 
 void
 netisr_pollmore()
 {
 	struct timeval t;
 	int kern_load;
 
 	mtx_lock(&poll_mtx);
 	if (!netisr_pollmore_scheduled) {
 		mtx_unlock(&poll_mtx);
 		return;
 	}
 	netisr_pollmore_scheduled = 0;
 	phase = 5;
 	if (residual_burst > 0) {
 		netisr_poll_scheduled = 1;
 		netisr_pollmore_scheduled = 1;
-		netisr2_sched_poll();
+		netisr_sched_poll();
 		mtx_unlock(&poll_mtx);
 		/* will run immediately on return, followed by netisrs */
 		return;
 	}
 	/* here we can account time spent in netisr's in this tick */
 	microuptime(&t);
 	kern_load = (t.tv_usec - poll_start_t.tv_usec) +
 		(t.tv_sec - poll_start_t.tv_sec)*1000000;	/* us */
 	kern_load = (kern_load * hz) / 10000;			/* 0..100 */
 	if (kern_load > (100 - user_frac)) { /* try decrease ticks */
 		if (poll_burst > 1)
 			poll_burst--;
 	} else {
 		if (poll_burst < poll_burst_max)
 			poll_burst++;
 	}
 
 	pending_polls--;
 	if (pending_polls == 0) /* we are done */
 		phase = 0;
 	else {
 		/*
 		 * Last cycle was long and caused us to miss one or more
 		 * hardclock ticks. Restart processing again, but slightly
 		 * reduce the burst size to prevent that this happens again.
 		 */
 		poll_burst -= (poll_burst / 8);
 		if (poll_burst < 1)
 			poll_burst = 1;
 		netisr_poll_scheduled = 1;
 		netisr_pollmore_scheduled = 1;
-		netisr2_sched_poll();
+		netisr_sched_poll();
 		phase = 6;
 	}
 	mtx_unlock(&poll_mtx);
 }
 
 /*
  * netisr_poll is typically scheduled once per tick.
  */
 void
 netisr_poll(void)
 {
 	int i, cycles;
 	enum poll_cmd arg = POLL_ONLY;
 
 	mtx_lock(&poll_mtx);
 	if (!netisr_poll_scheduled) {
 		mtx_unlock(&poll_mtx);
 		return;
 	}
 	netisr_poll_scheduled = 0;
 	phase = 3;
 	if (residual_burst == 0) { /* first call in this tick */
 		microuptime(&poll_start_t);
 		if (++reg_frac_count == reg_frac) {
 			arg = POLL_AND_CHECK_STATUS;
 			reg_frac_count = 0;
 		}
 
 		residual_burst = poll_burst;
 	}
 	cycles = (residual_burst < poll_each_burst) ?
 		residual_burst : poll_each_burst;
 	residual_burst -= cycles;
 
 	for (i = 0 ; i < poll_handlers ; i++)
 		pr[i].handler(pr[i].ifp, arg, cycles);
 
 	phase = 4;
 	mtx_unlock(&poll_mtx);
 }
 
 /*
  * Try to register routine for polling. Returns 0 if successful
  * (and polling should be enabled), error code otherwise.
  * A device is not supposed to register itself multiple times.
  *
  * This is called from within the *_ioctl() functions.
  */
 int
 ether_poll_register(poll_handler_t *h, struct ifnet *ifp)
 {
 	int i;
 
 	KASSERT(h != NULL, ("%s: handler is NULL", __func__));
 	KASSERT(ifp != NULL, ("%s: ifp is NULL", __func__));
 
 	mtx_lock(&poll_mtx);
 	if (poll_handlers >= POLL_LIST_LEN) {
 		/*
 		 * List full, cannot register more entries.
 		 * This should never happen; if it does, it is probably a
 		 * broken driver trying to register multiple times. Checking
 		 * this at runtime is expensive, and won't solve the problem
 		 * anyways, so just report a few times and then give up.
 		 */
 		static int verbose = 10 ;
 		if (verbose >0) {
 			log(LOG_ERR, "poll handlers list full, "
 			    "maybe a broken driver ?\n");
 			verbose--;
 		}
 		mtx_unlock(&poll_mtx);
 		return (ENOMEM); /* no polling for you */
 	}
 
 	for (i = 0 ; i < poll_handlers ; i++)
 		if (pr[i].ifp == ifp && pr[i].handler != NULL) {
 			mtx_unlock(&poll_mtx);
 			log(LOG_DEBUG, "ether_poll_register: %s: handler"
 			    " already registered\n", ifp->if_xname);
 			return (EEXIST);
 		}
 
 	pr[poll_handlers].handler = h;
 	pr[poll_handlers].ifp = ifp;
 	poll_handlers++;
 	mtx_unlock(&poll_mtx);
 	if (idlepoll_sleeping)
 		wakeup(&idlepoll_sleeping);
 	return (0);
 }
 
 /*
  * Remove interface from the polling list. Called from *_ioctl(), too.
  */
 int
 ether_poll_deregister(struct ifnet *ifp)
 {
 	int i;
 
 	KASSERT(ifp != NULL, ("%s: ifp is NULL", __func__));
 
 	mtx_lock(&poll_mtx);
 
 	for (i = 0 ; i < poll_handlers ; i++)
 		if (pr[i].ifp == ifp) /* found it */
 			break;
 	if (i == poll_handlers) {
 		log(LOG_DEBUG, "ether_poll_deregister: %s: not found!\n",
 		    ifp->if_xname);
 		mtx_unlock(&poll_mtx);
 		return (ENOENT);
 	}
 	poll_handlers--;
 	if (i < poll_handlers) { /* Last entry replaces this one. */
 		pr[i].handler = pr[poll_handlers].handler;
 		pr[i].ifp = pr[poll_handlers].ifp;
 	}
 	mtx_unlock(&poll_mtx);
 	return (0);
 }
 
 /*
  * Legacy interface for turning polling on all interfaces at one time.
  */
 static int
 poll_switch(SYSCTL_HANDLER_ARGS)
 {
 	INIT_VNET_NET(curvnet);
 	struct ifnet *ifp;
 	int error;
 	int val = polling;
 
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr )
 		return (error);
 
 	if (val == polling)
 		return (0);
 
 	if (val < 0 || val > 1)
 		return (EINVAL);
 
 	polling = val;
 
 	IFNET_RLOCK();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if (ifp->if_capabilities & IFCAP_POLLING) {
 			struct ifreq ifr;
 
 			if (val == 1)
 				ifr.ifr_reqcap =
 				    ifp->if_capenable | IFCAP_POLLING;
 			else
 				ifr.ifr_reqcap =
 				    ifp->if_capenable & ~IFCAP_POLLING;
 			(void) (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
 		}
 	}
 	IFNET_RUNLOCK();
 
 	log(LOG_ERR, "kern.polling.enable is deprecated. Use ifconfig(8)");
 
 	return (0);
 }
 
 static void
 poll_idle(void)
 {
 	struct thread *td = curthread;
 	struct rtprio rtp;
 
 	rtp.prio = RTP_PRIO_MAX;	/* lowest priority */
 	rtp.type = RTP_PRIO_IDLE;
 	PROC_SLOCK(td->td_proc);
 	rtp_to_pri(&rtp, td);
 	PROC_SUNLOCK(td->td_proc);
 
 	for (;;) {
 		if (poll_in_idle_loop && poll_handlers > 0) {
 			idlepoll_sleeping = 0;
 			ether_poll(poll_each_burst);
 			thread_lock(td);
 			mi_switch(SW_VOL, NULL);
 			thread_unlock(td);
 		} else {
 			idlepoll_sleeping = 1;
 			tsleep(&idlepoll_sleeping, 0, "pollid", hz * 3);
 		}
 	}
 }
 
 static struct proc *idlepoll;
 static struct kproc_desc idlepoll_kp = {
 	 "idlepoll",
 	 poll_idle,
 	 &idlepoll
 };
 SYSINIT(idlepoll, SI_SUB_KTHREAD_VM, SI_ORDER_ANY, kproc_start,
     &idlepoll_kp);
Index: projects/pnet/sys/net/netisr.c
===================================================================
--- projects/pnet/sys/net/netisr.c	(revision 193105)
+++ projects/pnet/sys/net/netisr.c	(revision 193106)
@@ -1,1090 +1,1090 @@
 /*-
  * Copyright (c) 2007-2009 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
- * netisr2 is a packet dispatch service, allowing synchronous (directly
+ * netisr is a packet dispatch service, allowing synchronous (directly
  * dispatched) and asynchronous (deferred dispatch) processing of packets by
  * registered protocol handlers.  Callers pass a protocol identifier and
- * packet to netisr2, along with a direct dispatch hint, and work will either
+ * packet to netisr, along with a direct dispatch hint, and work will either
  * be immediately processed with the registered handler, or passed to a
  * kernel software interrupt (SWI) thread for deferred dispatch.  Callers
  * will generally select one or the other based on:
  *
  * - Might directly dispatching a netisr handler lead to code reentrance or
  *   lock recursion, such as entering the socket code from the socket code.
  * - Might directly dispatching a netisr handler lead to recursive
  *   processing, such as when decapsulating several wrapped layers of tunnel
  *   information (IPSEC within IPSEC within ...).
  *
  * Maintaining ordering for protocol streams is a critical design concern.
  * Enforcing ordering limits the opportunity for concurrency, but maintains
  * the strong ordering requirements found in some protocols, such as TCP.  Of
  * related concern is CPU affinity--it is desirable to process all data
  * associated with a particular stream on the same CPU over time in order to
  * avoid acquiring locks associated with the connection on different CPUs,
  * keep connection data in one cache, and to generally encourage associated
  * user threads to live on the same CPU as the stream.  It's also desirable
  * to avoid lock migration and contention where locks are associated with
  * more than one flow.
  *
- * netisr2 supports several policy variations, represented by the
+ * netisr supports several policy variations, represented by the
  * NETISR_POLICY_* constants, allowing protocols to play a varying role in
  * identifying flows, assigning work to CPUs, etc.  These are described in
  * detail in netisr.h.
  */
 
 #include "opt_ddb.h"
 #include "opt_device_polling.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/interrupt.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rmlock.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
 
 /*-
  * Synchronize use and modification of the registered netisr data structures;
  * acquire a read lock while modifying the set of registered protocols to
  * prevent partially registered or unregistered protocols from being run.
  *
  * The following data structures and fields are protected by this lock:
  *
  * - The np array, including all fields of struct netisr_proto.
  * - The nws array, including all fields of struct netisr_worker.
  * - The nws_array array.
  *
- * Note: the NETISR2_LOCKING define controls whether read locks are acquired
+ * Note: the NETISR_LOCKING define controls whether read locks are acquired
  * in packet processing paths requiring netisr registration stability.  This
  * is disabled by default as it can lead to a measurable performance
  * degradation even with rmlocks (3%-6% for loopback ping-ping traffic), and
  * because netisr registration and unregistration is extremely rare at
  * runtime.  If it becomes more common, this decision should be revisited.
  *
  * XXXRW: rmlocks don't support assertions.
  */
 static struct rmlock	netisr_rmlock;
 #define	NETISR_LOCK_INIT()	rm_init_flags(&netisr_rmlock, "netisr", \
 				    RM_NOWITNESS)
 #define	NETISR_LOCK_ASSERT()
 #define	NETISR_RLOCK(tracker)	rm_rlock(&netisr_rmlock, (tracker))
 #define	NETISR_RUNLOCK(tracker)	rm_runlock(&netisr_rmlock, (tracker))
 #define	NETISR_WLOCK()		rm_wlock(&netisr_rmlock)
 #define	NETISR_WUNLOCK()	rm_wunlock(&netisr_rmlock)
-/* #define	NETISR2_LOCKING */
+/* #define	NETISR_LOCKING */
 
-SYSCTL_NODE(_net, OID_AUTO, isr2, CTLFLAG_RW, 0, "netisr2");
+SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr");
 
 /*-
  * Three direct dispatch policies are supported:
  *
  * - Always defer: all work is scheduled for a netisr, regardless of context.
  *   (!direct_enable)
  *
  * - Hybrid: if the executing context allows direct dispatch, and we're
  *   running on the CPU the work would be done on, then direct dispatch if it
  *   wouldn't violate ordering constraints on the workstream.
  *   (direct_enable && !direct_force)
  *
  * - Always direct: if the executing context allows direct dispatch, always
  *   direct dispatch.  (direct_enable && direct_force)
  *
  * Notice that changing the global policy could lead to short periods of
  * misordered processing, but this is considered acceptable as compared to
  * the complexity of enforcing ordering during policy changes.
  */
 static int	netisr_direct_force = 1;	/* Always direct dispatch. */
-SYSCTL_INT(_net_isr2, OID_AUTO, direct_force, CTLFLAG_RW,
+SYSCTL_INT(_net_isr, OID_AUTO, direct_force, CTLFLAG_RW,
     &netisr_direct_force, 0, "Force direct dispatch");
 
 static int	netisr_direct_enable = 1;	/* Enable direct dispatch. */
-SYSCTL_INT(_net_isr2, OID_AUTO, direct_enable, CTLFLAG_RW,
+SYSCTL_INT(_net_isr, OID_AUTO, direct_enable, CTLFLAG_RW,
     &netisr_direct_enable, 0, "Enable direct dispatch");
 
 /*
  * Allow the administrator to limit the number of threads (CPUs) to use for
- * netisr2.  We don't check netisr_maxthreads before creating the thread for
+ * netisr.  We don't check netisr_maxthreads before creating the thread for
  * CPU 0, so in practice we ignore values <= 1.  This must be set at boot.
  * We will create at most one thread per CPU.
  */
 static int	netisr_maxthreads = 1;		/* Max number of threads. */
-TUNABLE_INT("net.isr2.maxthreads", &netisr_maxthreads);
-SYSCTL_INT(_net_isr2, OID_AUTO, maxthreads, CTLFLAG_RD,
+TUNABLE_INT("net.isr.maxthreads", &netisr_maxthreads);
+SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RD,
     &netisr_maxthreads, 0,
-    "Use at most this many CPUs for netisr2 processing");
+    "Use at most this many CPUs for netisr processing");
 
 static int	netisr_bindthreads = 0;		/* Bind threads to CPUs. */
-TUNABLE_INT("net.isr2.bindthreads", &netisr_bindthreads);
-SYSCTL_INT(_net_isr2, OID_AUTO, bindthreads, CTLFLAG_RD,
-    &netisr_bindthreads, 0, "Bind netisr2 threads to CPUs.");
+TUNABLE_INT("net.isr.bindthreads", &netisr_bindthreads);
+SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RD,
+    &netisr_bindthreads, 0, "Bind netisr threads to CPUs.");
 
 /*
- * Limit per-workstream queues to at most net.isr2.maxqlimit, both for
- * initial configuration and later modification using netisr2_setqlimit().
+ * Limit per-workstream queues to at most net.isr.maxqlimit, both for initial
+ * configuration and later modification using netisr_setqlimit().
  */
 #define	NETISR_DEFAULT_MAXQLIMIT	10240
 static int	netisr_maxqlimit = NETISR_DEFAULT_MAXQLIMIT;
-SYSCTL_INT(_net_isr2, OID_AUTO, maxqlimit, CTLFLAG_RD,
+SYSCTL_INT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RD,
     &netisr_maxqlimit, 0,
-    "Maximum netisr2 per-protocol, per-CPU queue depth.");
+    "Maximum netisr per-protocol, per-CPU queue depth.");
 
 /*
  * Each protocol is described by a struct netisr_proto, which holds all
  * global per-protocol information.  This data structure is set up by
  * netisr_register(), and derived from the public struct netisr_handler.
  */
 struct netisr_proto {
 	const char	*np_name;	/* Character string protocol name. */
-	netisr_t	*np_handler;	/* Protocol handler. */
+	netisr_handler_t *np_handler;	/* Protocol handler. */
 	netisr_m2flow_t	*np_m2flow;	/* Query flow for untagged packet. */
 	netisr_m2cpuid_t *np_m2cpuid;	/* Query CPU to process packet on. */
 	u_int		 np_qlimit;	/* Maximum per-CPU queue depth. */
 	u_int		 np_policy;	/* Work placement policy. */
 };
 
 #define	NETISR_MAXPROT		32		/* Compile-time limit. */
 
 /*
  * The np array describes all registered protocols, indexed by protocol
  * number.
  */
 static struct netisr_proto	np[NETISR_MAXPROT];
 
 /*
  * Protocol-specific work for each workstream is described by struct
  * netisr_work.  Each work descriptor consists of an mbuf queue and
  * statistics.
  */
 struct netisr_work {
 	/*
 	 * Packet queue, linked by m_nextpkt.
 	 */
 	struct mbuf	*nw_head;
 	struct mbuf	*nw_tail;
 	u_int		 nw_len;
 	u_int		 nw_qlimit;
 	u_int		 nw_watermark;
 
 	/*
 	 * Statistics -- written unlocked, but mostly from curcpu.
 	 */
 	u_int64_t	 nw_dispatched; /* Number of direct dispatches. */
 	u_int64_t	 nw_hybrid_dispatched; /* "" hybrid dispatches. */
 	u_int64_t	 nw_qdrops;	/* "" drops. */
 	u_int64_t	 nw_queued;	/* "" enqueues. */
 	u_int64_t	 nw_handled;	/* "" handled in worker. */
 };
 
 /*
  * Workstreams hold a set of ordered work across each protocol, and are
  * described by netisr_workstream.  Each workstream is associated with a
  * worker thread, which in turn is pinned to a CPU.  Work associated with a
  * workstream can be processd in other threads during direct dispatch;
  * concurrent processing is prevented by the NWS_RUNNING flag, which
  * indicates that a thread is already processing the work queue.
  */
 struct netisr_workstream {
 	struct intr_event *nws_intr_event;	/* Handler for stream. */
 	void		*nws_swi_cookie;	/* swi(9) cookie for stream. */
 	struct mtx	 nws_mtx;		/* Synchronize work. */
 	u_int		 nws_cpu;		/* CPU pinning. */
 	u_int		 nws_flags;		/* Wakeup flags. */
 	u_int		 nws_pendingbits;	/* Scheduled protocols. */
 
 	/*
 	 * Each protocol has per-workstream data.
 	 */
 	struct netisr_work	nws_work[NETISR_MAXPROT];
 } __aligned(CACHE_LINE_SIZE);
 
 /*
  * Per-CPU workstream data, indexed by CPU ID.
  */
 static struct netisr_workstream		 nws[MAXCPU];
 
 /*
  * Map contiguous values between 0 and nws_count into CPU IDs appropriate for
  * indexing the nws[] array.  This allows constructions of the form
  * nws[nws_array(arbitraryvalue % nws_count)].
  */
 static u_int				 nws_array[MAXCPU];
 
 /*
  * Number of registered workstreams.  Will be at most the number of running
  * CPUs once fully started.
  */
 static u_int				 nws_count;
 
 /*
  * Per-workstream flags.
  */
 #define	NWS_RUNNING	0x00000001	/* Currently running in a thread. */
 #define	NWS_DISPATCHING	0x00000002	/* Currently being direct-dispatched. */
 #define	NWS_SCHEDULED	0x00000004	/* Signal issued. */
 
 /*
  * Synchronization for each workstream: a mutex protects all mutable fields
  * in each stream, including per-protocol state (mbuf queues).  The SWI is
  * woken up if asynchronous dispatch is required.
  */
 #define	NWS_LOCK(s)		mtx_lock(&(s)->nws_mtx)
 #define	NWS_LOCK_ASSERT(s)	mtx_assert(&(s)->nws_mtx, MA_OWNED)
 #define	NWS_UNLOCK(s)		mtx_unlock(&(s)->nws_mtx)
 #define	NWS_SIGNAL(s)		swi_sched((s)->nws_swi_cookie, 0)
 
 /*
  * Utility routines for protocols that implement their own mapping of flows
  * to CPUs.
  */
 u_int
-netisr2_get_cpucount(void)
+netisr_get_cpucount(void)
 {
 
 	return (nws_count);
 }
 
 u_int
-netisr2_get_cpuid(u_int cpunumber)
+netisr_get_cpuid(u_int cpunumber)
 {
 
-	KASSERT(cpunumber < nws_count, ("netisr2_get_cpuid: %u > %u",
-	    cpunumber, nws_count));
+	KASSERT(cpunumber < nws_count, ("%s: %u > %u", __func__, cpunumber,
+	    nws_count));
 
 	return (nws_array[cpunumber]);
 }
 
 /*
  * The default implementation of -> CPU ID mapping.
  *
  * Non-static so that protocols can use it to map their own work to specific
- * CPUs in a manner consistent to netisr2 for affinity purposes.
+ * CPUs in a manner consistent to netisr for affinity purposes.
  */
 u_int
-netisr2_default_flow2cpu(u_int flowid)
+netisr_default_flow2cpu(u_int flowid)
 {
 
 	return (nws_array[flowid % nws_count]);
 }
 
 /*
  * Register a new netisr handler, which requires initializing per-protocol
- * fields for each workstream.  All netisr2 work is briefly suspended while
+ * fields for each workstream.  All netisr work is briefly suspended while
  * the protocol is installed.
  */
 void
-netisr2_register(const struct netisr_handler *nhp)
+netisr_register(const struct netisr_handler *nhp)
 {
 	struct netisr_work *npwp;
 	const char *name;
 	u_int i, proto;
 
 	proto = nhp->nh_proto;
 	name = nhp->nh_name;
 
 	/*
 	 * Test that the requested registration is valid.
 	 */
 	KASSERT(nhp->nh_name != NULL,
-	    ("netisr2_register: nh_name NULL for %d", proto));
+	    ("%s: nh_name NULL for %d", __func__, proto));
 	KASSERT(nhp->nh_handler != NULL,
-	    ("netisr2_register: nh_handler NULL for %s", name));
+	    ("%s: nh_handler NULL for %s", __func__, name));
 	KASSERT(nhp->nh_policy == NETISR_POLICY_SOURCE ||
 	    nhp->nh_policy == NETISR_POLICY_FLOW ||
 	    nhp->nh_policy == NETISR_POLICY_CPU,
-	    ("netisr2_register: unsupported nh_policy %u for %s",
+	    ("%s: unsupported nh_policy %u for %s", __func__,
 	    nhp->nh_policy, name));
 	KASSERT(nhp->nh_policy == NETISR_POLICY_FLOW ||
 	    nhp->nh_m2flow == NULL,
-	    ("netisr2_register: nh_policy != FLOW but m2flow defined for %s",
+	    ("%s: nh_policy != FLOW but m2flow defined for %s", __func__,
 	    name));
 	KASSERT(nhp->nh_policy == NETISR_POLICY_CPU || nhp->nh_m2cpuid == NULL,
-	    ("netisr2_register: nh_policy != CPU but m2cpuid defined for %s",
+	    ("%s: nh_policy != CPU but m2cpuid defined for %s", __func__,
 	    name));
 	KASSERT(nhp->nh_policy != NETISR_POLICY_CPU || nhp->nh_m2cpuid != NULL,
-	    ("netisr2_register: nh_policy == CPU but m2cpuid not defined for "
-	    "%s", name));
+	    ("%s: nh_policy == CPU but m2cpuid not defined for %s", __func__,
+	    name));
 	KASSERT(nhp->nh_qlimit != 0,
-	    ("netisr2_register: nh_qlimit 0 for %s", name));
+	    ("%s: nh_qlimit 0 for %s", __func__, name));
 	KASSERT(proto < NETISR_MAXPROT,
-	    ("netisr2_register(%d, %s): protocol too big", proto, name));
+	    ("%s(%d, %s): protocol too big", __func__, proto, name));
 
 	/*
 	 * Test that no existing registration exists for this protocol.
 	 */
 	NETISR_WLOCK();
 	KASSERT(np[proto].np_name == NULL,
-	    ("netisr2_register(%d, %s): name present", proto, name));
+	    ("%s(%d, %s): name present", __func__, proto, name));
 	KASSERT(np[proto].np_handler == NULL,
-	    ("netisr2_register(%d, %s): handler present", proto, name));
+	    ("%s(%d, %s): handler present", __func__, proto, name));
 
 	np[proto].np_name = name;
 	np[proto].np_handler = nhp->nh_handler;
 	np[proto].np_m2flow = nhp->nh_m2flow;
 	np[proto].np_m2cpuid = nhp->nh_m2cpuid;
 	if (nhp->nh_qlimit > netisr_maxqlimit) {
-		printf("netisr2_register: %s requested queue limit %u "
-		    "capped to net.isr2.maxqlimit %u\n", name,
-		    nhp->nh_qlimit, netisr_maxqlimit);
+		printf("%s: %s requested queue limit %u capped to "
+		    "net.isr.maxqlimit %u\n", __func__, name, nhp->nh_qlimit,
+		    netisr_maxqlimit);
 		np[proto].np_qlimit = netisr_maxqlimit;
 	} else
 		np[proto].np_qlimit = nhp->nh_qlimit;
 	np[proto].np_policy = nhp->nh_policy;
 	for (i = 0; i < MAXCPU; i++) {
 		npwp = &nws[i].nws_work[proto];
 		bzero(npwp, sizeof(*npwp));
 		npwp->nw_qlimit = nhp->nh_qlimit;
 	}
 	NETISR_WUNLOCK();
 }
 
 /*
  * Clear drop counters across all workstreams for a protocol.
  */
 void
-netisr2_clearqdrops(const struct netisr_handler *nhp)
+netisr_clearqdrops(const struct netisr_handler *nhp)
 {
 	struct netisr_work *npwp;
 #ifdef INVARIANTS
 	const char *name;
 #endif
 	u_int i, proto;
 
 	proto = nhp->nh_proto;
 #ifdef INVARIANTS
 	name = nhp->nh_name;
 #endif
 	KASSERT(proto < NETISR_MAXPROT,
-	    ("netisr_clearqdrops(%d): protocol too big for %s", proto, name));
+	    ("%s(%d): protocol too big for %s", __func__, proto, name));
 
 	NETISR_WLOCK();
 	KASSERT(np[proto].np_handler != NULL,
-	    ("netisr_clearqdrops(%d): protocol not registered for %s", proto,
+	    ("%s(%d): protocol not registered for %s", __func__, proto,
 	    name));
 
 	for (i = 0; i < MAXCPU; i++) {
 		npwp = &nws[i].nws_work[proto];
 		npwp->nw_qdrops = 0;
 	}
 	NETISR_WUNLOCK();
 }
 
 /*
  * Query the current drop counters across all workstreams for a protocol.
  */
 void
-netisr2_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp)
+netisr_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp)
 {
 	struct netisr_work *npwp;
 	struct rm_priotracker tracker;
 #ifdef INVARIANTS
 	const char *name;
 #endif
 	u_int i, proto;
 
 	*qdropp = 0;
 	proto = nhp->nh_proto;
 #ifdef INVARIANTS
 	name = nhp->nh_name;
 #endif
 	KASSERT(proto < NETISR_MAXPROT,
-	    ("netisr_getqdrops(%d): protocol too big for %s", proto, name));
+	    ("%s(%d): protocol too big for %s", __func__, proto, name));
 
 	NETISR_RLOCK(&tracker);
 	KASSERT(np[proto].np_handler != NULL,
-	    ("netisr_getqdrops(%d): protocol not registered for %s", proto,
+	    ("%s(%d): protocol not registered for %s", __func__, proto,
 	    name));
 
 	for (i = 0; i < MAXCPU; i++) {
 		npwp = &nws[i].nws_work[proto];
 		*qdropp += npwp->nw_qdrops;
 	}
 	NETISR_RUNLOCK(&tracker);
 }
 
 /*
  * Query the current queue limit for per-workstream queues for a protocol.
  */
 void
-netisr2_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp)
+netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp)
 {
 	struct rm_priotracker tracker;
 #ifdef INVARIANTS
 	const char *name;
 #endif
 	u_int proto;
 
 	proto = nhp->nh_proto;
 #ifdef INVARIANTS
 	name = nhp->nh_name;
 #endif
 	KASSERT(proto < NETISR_MAXPROT,
-	    ("netisr_getqlimit(%d): protocol too big for %s", proto, name));
+	    ("%s(%d): protocol too big for %s", __func__, proto, name));
 
 	NETISR_RLOCK(&tracker);
 	KASSERT(np[proto].np_handler != NULL,
-	    ("netisr_getqlimit(%d): protocol not registered for %s", proto,
+	    ("%s(%d): protocol not registered for %s", __func__, proto,
 	    name));
 	*qlimitp = np[proto].np_qlimit;
 	NETISR_RUNLOCK(&tracker);
 }
 
 /*
  * Update the queue limit across per-workstream queues for a protocol.  We
  * simply change the limits, and don't drain overflowed packets as they will
  * (hopefully) take care of themselves shortly.
  */
 int
-netisr2_setqlimit(const struct netisr_handler *nhp, u_int qlimit)
+netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit)
 {
 	struct netisr_work *npwp;
 #ifdef INVARIANTS
 	const char *name;
 #endif
 	u_int i, proto;
 
 	if (qlimit > netisr_maxqlimit)
 		return (EINVAL);
 
 	proto = nhp->nh_proto;
 #ifdef INVARIANTS
 	name = nhp->nh_name;
 #endif
 	KASSERT(proto < NETISR_MAXPROT,
-	    ("netisr_setqlimit(%d): protocol too big for %s", proto, name));
+	    ("%s(%d): protocol too big for %s", __func__, proto, name));
 
 	NETISR_WLOCK();
 	KASSERT(np[proto].np_handler != NULL,
-	    ("netisr_setqlimit(%d): protocol not registered for %s", proto,
+	    ("%s(%d): protocol not registered for %s", __func__, proto,
 	    name));
 
 	np[proto].np_qlimit = qlimit;
 	for (i = 0; i < MAXCPU; i++) {
 		npwp = &nws[i].nws_work[proto];
 		npwp->nw_qlimit = qlimit;
 	}
 	NETISR_WUNLOCK();
 	return (0);
 }
 
 /*
  * Drain all packets currently held in a particular protocol work queue.
  */
 static void
-netisr2_drain_proto(struct netisr_work *npwp)
+netisr_drain_proto(struct netisr_work *npwp)
 {
 	struct mbuf *m;
 
 	while ((m = npwp->nw_head) != NULL) {
 		npwp->nw_head = m->m_nextpkt;
 		m->m_nextpkt = NULL;
 		if (npwp->nw_head == NULL)
 			npwp->nw_tail = NULL;
 		npwp->nw_len--;
 		m_freem(m);
 	}
-	KASSERT(npwp->nw_tail == NULL, ("netisr_drain_proto: tail"));
-	KASSERT(npwp->nw_len == 0, ("netisr_drain_proto: len"));
+	KASSERT(npwp->nw_tail == NULL, ("%s: tail", __func__));
+	KASSERT(npwp->nw_len == 0, ("%s: len", __func__));
 }
 
 /*
  * Remove the registration of a network protocol, which requires clearing
  * per-protocol fields across all workstreams, including freeing all mbufs in
- * the queues at time of unregister.  All work in netisr2 is briefly
- * suspended while this takes place.
+ * the queues at time of unregister.  All work in netisr is briefly suspended
+ * while this takes place.
  */
 void
-netisr2_unregister(const struct netisr_handler *nhp)
+netisr_unregister(const struct netisr_handler *nhp)
 {
 	struct netisr_work *npwp;
 #ifdef INVARIANTS
 	const char *name;
 #endif
 	u_int i, proto;
 
 	proto = nhp->nh_proto;
 #ifdef INVARIANTS
 	name = nhp->nh_name;
 #endif
 	KASSERT(proto < NETISR_MAXPROT,
-	    ("netisr_unregister(%d): protocol too big for %s", proto, name));
+	    ("%s(%d): protocol too big for %s", __func__, proto, name));
 
 	NETISR_WLOCK();
 	KASSERT(np[proto].np_handler != NULL,
-	    ("netisr_unregister(%d): protocol not registered for %s", proto,
+	    ("%s(%d): protocol not registered for %s", __func__, proto,
 	    name));
 
 	np[proto].np_name = NULL;
 	np[proto].np_handler = NULL;
 	np[proto].np_m2flow = NULL;
 	np[proto].np_m2cpuid = NULL;
 	np[proto].np_qlimit = 0;
 	np[proto].np_policy = 0;
 	for (i = 0; i < MAXCPU; i++) {
 		npwp = &nws[i].nws_work[proto];
-		netisr2_drain_proto(npwp);
+		netisr_drain_proto(npwp);
 		bzero(npwp, sizeof(*npwp));
 	}
 	NETISR_WUNLOCK();
 }
 
 /*
  * Look up the workstream given a packet and source identifier.  Do this by
  * checking the protocol's policy, and optionally call out to the protocol
  * for assistance if required.
  */
 static struct mbuf *
-netisr2_select_cpuid(struct netisr_proto *npp, uintptr_t source,
+netisr_select_cpuid(struct netisr_proto *npp, uintptr_t source,
     struct mbuf *m, u_int *cpuidp)
 {
 	struct ifnet *ifp;
 
 	NETISR_LOCK_ASSERT();
 
 	/*
 	 * In the event we have only one worker, shortcut and deliver to it
 	 * without further ado.
 	 */
 	if (nws_count == 1) {
 		*cpuidp = nws_array[0];
 		return (m);
 	}
 
 	/*
 	 * What happens next depends on the policy selected by the protocol.
 	 * If we want to support per-interface policies, we should do that
 	 * here first.
 	 */
 	switch (npp->np_policy) {
 	case NETISR_POLICY_CPU:
 		return (npp->np_m2cpuid(m, source, cpuidp));
 
 	case NETISR_POLICY_FLOW:
 		if (!(m->m_flags & M_FLOWID) && npp->np_m2flow != NULL) {
 			m = npp->np_m2flow(m, source);
 			if (m == NULL)
 				return (NULL);
 		}
 		if (m->m_flags & M_FLOWID) {
 			*cpuidp =
-			    netisr2_default_flow2cpu(m->m_pkthdr.flowid);
+			    netisr_default_flow2cpu(m->m_pkthdr.flowid);
 			return (m);
 		}
 		/* FALLTHROUGH */
 
 	case NETISR_POLICY_SOURCE:
 		ifp = m->m_pkthdr.rcvif;
 		if (ifp != NULL)
 			*cpuidp = nws_array[(ifp->if_index + source) %
 			    nws_count];
 		else
 			*cpuidp = nws_array[source % nws_count];
 		return (m);
 
 	default:
-		panic("netisr2_select_cpuid: invalid policy %u for %s",
+		panic("%s: invalid policy %u for %s", __func__,
 		    npp->np_policy, npp->np_name);
 	}
 }
 
 /*
  * Process packets associated with a workstream and protocol.  For reasons of
  * fairness, we process up to one complete netisr queue at a time, moving the
  * queue to a stack-local queue for processing, but do not loop refreshing
  * from the global queue.  The caller is responsible for deciding whether to
  * loop, and for setting the NWS_RUNNING flag.  The passed workstream will be
  * locked on entry and relocked before return, but will be released while
  * processing.  The number of packets processed is returned.
  */
 static u_int
-netisr2_process_workstream_proto(struct netisr_workstream *nwsp, u_int proto)
+netisr_process_workstream_proto(struct netisr_workstream *nwsp, u_int proto)
 {
 	struct netisr_work local_npw, *npwp;
 	u_int handled;
 	struct mbuf *m;
 
 	NWS_LOCK_ASSERT(nwsp);
 
 	KASSERT(nwsp->nws_flags & NWS_RUNNING,
-	    ("netisr_process_workstream_proto(%d): not running", proto));
+	    ("%s(%d): not running", __func__, proto));
 	KASSERT(proto >= 0 && proto < NETISR_MAXPROT,
-	    ("netisr_process_workstream_proto(%d): invalid proto\n", proto));
+	    ("%s(%d): invalid proto\n", __func__, proto));
 
 	npwp = &nwsp->nws_work[proto];
 	if (npwp->nw_len == 0)
 		return (0);
 
 	/*
 	 * Move the global work queue to a thread-local work queue.
 	 *
 	 * Notice that this means the effective maximum length of the queue
 	 * is actually twice that of the maximum queue length specified in
 	 * the protocol registration call.
 	 */
 	handled = npwp->nw_len;
 	local_npw = *npwp;
 	npwp->nw_head = NULL;
 	npwp->nw_tail = NULL;
 	npwp->nw_len = 0;
 	nwsp->nws_pendingbits &= ~(1 << proto);
 	NWS_UNLOCK(nwsp);
 	while ((m = local_npw.nw_head) != NULL) {
 		local_npw.nw_head = m->m_nextpkt;
 		m->m_nextpkt = NULL;
 		if (local_npw.nw_head == NULL)
 			local_npw.nw_tail = NULL;
 		local_npw.nw_len--;
 		np[proto].np_handler(m);
 	}
 	KASSERT(local_npw.nw_len == 0,
-	    ("netisr_process_proto(%d): len %d", proto, local_npw.nw_len));
+	    ("%s(%d): len %d", __func__, proto, local_npw.nw_len));
 	NWS_LOCK(nwsp);
 	npwp->nw_handled += handled;
 	return (handled);
 }
 
 /*
- * SWI handler for netisr2 -- processes prackets in a set of workstreams that
+ * SWI handler for netisr -- processes prackets in a set of workstreams that
  * it owns, woken up by calls to NWS_SIGNAL().  If this workstream is already
  * being direct dispatched, go back to sleep and wait for the dispatching
  * thread to wake us up again.
  */
 static void
 swi_net(void *arg)
 {
-#ifdef NETISR2_LOCKING
+#ifdef NETISR_LOCKING
 	struct rm_priotracker tracker;
 #endif
 	struct netisr_workstream *nwsp;
 	u_int bits, prot;
 
 	nwsp = arg;
 
 #ifdef DEVICE_POLLING
 	KASSERT(nws_count == 1,
-	    ("swi_net: device_polling but nws_count != 1"));
+	    ("%s: device_polling but nws_count != 1", __func__));
 	netisr_poll();
 #endif
-#ifdef NETISR2_LOCKING
+#ifdef NETISR_LOCKING
 	NETISR_RLOCK(&tracker);
 #endif
 	NWS_LOCK(nwsp);
 	KASSERT(!(nwsp->nws_flags & NWS_RUNNING), ("swi_net: running"));
 	if (nwsp->nws_flags & NWS_DISPATCHING)
 		goto out;
 	nwsp->nws_flags |= NWS_RUNNING;
 	nwsp->nws_flags &= ~NWS_SCHEDULED;
 	while ((bits = nws->nws_pendingbits) != 0) {
 		while ((prot = ffs(bits)) != 0) {
 			prot--;
 			bits &= ~(1 << prot);
-			(void)netisr2_process_workstream_proto(nwsp, prot);
+			(void)netisr_process_workstream_proto(nwsp, prot);
 		}
 	}
 	nwsp->nws_flags &= ~NWS_RUNNING;
 out:
 	NWS_UNLOCK(nwsp);
-#ifdef NETISR2_LOCKING
+#ifdef NETISR_LOCKING
 	NETISR_RUNLOCK(&tracker);
 #endif
 #ifdef DEVICE_POLLING
 	netisr_pollmore();
 #endif
 }
 
 static int
-netisr2_queue_workstream(struct netisr_workstream *nwsp, u_int proto,
+netisr_queue_workstream(struct netisr_workstream *nwsp, u_int proto,
     struct netisr_work *npwp, struct mbuf *m, int *dosignalp)
 {
 
 	NWS_LOCK_ASSERT(nwsp);
 
 	*dosignalp = 0;
 	if (npwp->nw_len < npwp->nw_qlimit) {
 		m->m_nextpkt = NULL;
 		if (npwp->nw_head == NULL) {
 			npwp->nw_head = m;
 			npwp->nw_tail = m;
 		} else {
 			npwp->nw_tail->m_nextpkt = m;
 			npwp->nw_tail = m;
 		}
 		npwp->nw_len++;
 		if (npwp->nw_len > npwp->nw_watermark)
 			npwp->nw_watermark = npwp->nw_len;
 		nwsp->nws_pendingbits |= (1 << proto);
 		if (!(nwsp->nws_flags & (NWS_SCHEDULED | NWS_RUNNING))) {
 			nwsp->nws_flags |= NWS_SCHEDULED;
 			*dosignalp = 1;	/* Defer until unlocked. */
 		}
 		npwp->nw_queued++;
 		return (0);
 	} else {
 		npwp->nw_qdrops++;
 		return (ENOBUFS);
 	}
 }
 
 static int
-netisr2_queue_internal(u_int proto, struct mbuf *m, u_int cpuid)
+netisr_queue_internal(u_int proto, struct mbuf *m, u_int cpuid)
 {
 	struct netisr_workstream *nwsp;
 	struct netisr_work *npwp;
 	int dosignal, error;
 
-#ifdef NETISR2_LOCKING
+#ifdef NETISR_LOCKING
 	NETISR_LOCK_ASSERT();
 #endif
-	KASSERT(cpuid < MAXCPU, ("netisr2_queue_internal: cpuid too big "
-	    "(%u, %u)", cpuid, MAXCPU));
+	KASSERT(cpuid < MAXCPU, ("%s: cpuid too big (%u, %u)", __func__,
+	    cpuid, MAXCPU));
 
 	dosignal = 0;
 	error = 0;
 	nwsp = &nws[cpuid];
 	npwp = &nwsp->nws_work[proto];
 	NWS_LOCK(nwsp);
-	error = netisr2_queue_workstream(nwsp, proto, npwp, m, &dosignal);
+	error = netisr_queue_workstream(nwsp, proto, npwp, m, &dosignal);
 	NWS_UNLOCK(nwsp);
 	if (dosignal)
 		NWS_SIGNAL(nwsp);
 	return (error);
 }
 
 int
-netisr2_queue_src(u_int proto, uintptr_t source, struct mbuf *m)
+netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m)
 {
-#ifdef NETISR2_LOCKING
+#ifdef NETISR_LOCKING
 	struct rm_priotracker tracker;
 #endif
 	u_int cpuid, error;
 
 	KASSERT(proto < NETISR_MAXPROT,
-	    ("netisr2_queue_src: invalid proto %d", proto));
+	    ("%s: invalid proto %d", __func__, proto));
 
-#ifdef NETISR2_LOCKING
+#ifdef NETISR_LOCKING
 	NETISR_RLOCK(&tracker);
 #endif
 	KASSERT(np[proto].np_handler != NULL,
-	    ("netisr2_queue_src: invalid proto %d", proto));
+	    ("%s: invalid proto %d", __func__, proto));
 
-	m = netisr2_select_cpuid(&np[proto], source, m, &cpuid);
+	m = netisr_select_cpuid(&np[proto], source, m, &cpuid);
 	if (m != NULL)
-		error = netisr2_queue_internal(proto, m, cpuid);
+		error = netisr_queue_internal(proto, m, cpuid);
 	else
 		error = ENOBUFS;
-#ifdef NETISR2_LOCKING
+#ifdef NETISR_LOCKING
 	NETISR_RUNLOCK(&tracker);
 #endif
 	return (error);
 }
 
 int
 netisr_queue(u_int proto, struct mbuf *m)
 {
 
-	return (netisr2_queue_src(proto, 0, m));
+	return (netisr_queue_src(proto, 0, m));
 }
 
 /*
- * Dispatch a packet for netisr2 processing, direct dispatch permitted by
+ * Dispatch a packet for netisr processing, direct dispatch permitted by
  * calling context.
  */
 int
-netisr2_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m)
+netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m)
 {
-#ifdef NETISR2_LOCKING
+#ifdef NETISR_LOCKING
 	struct rm_priotracker tracker;
 #endif
 	struct netisr_workstream *nwsp;
 	struct netisr_work *npwp;
 	int dosignal, error;
 	u_int cpuid;
 
 	/*
 	 * If direct dispatch is entirely disabled, fall back on queueing.
 	 */
 	if (!netisr_direct_enable)
-		return (netisr2_queue_src(proto, source, m));
+		return (netisr_queue_src(proto, source, m));
 
 	KASSERT(proto < NETISR_MAXPROT,
-	    ("netisr2_dispatch_src: invalid proto %u", proto));
-#ifdef NETISR2_LOCKING
+	    ("%s: invalid proto %u", __func__, proto));
+#ifdef NETISR_LOCKING
 	NETISR_RLOCK(&tracker);
 #endif
 	KASSERT(np[proto].np_handler != NULL,
-	    ("netisr2_dispatch_src: invalid proto %u", proto));
+	    ("%s: invalid proto %u", __func__, proto));
 
 	/*
 	 * If direct dispatch is forced, then unconditionally dispatch
 	 * without a formal CPU selection.  Borrow the current CPU's stats,
 	 * even if there's no worker on it.
 	 */
 	if (netisr_direct_force) {
 		nwsp = &nws[curcpu];
 		npwp = &nwsp->nws_work[proto];
 		npwp->nw_dispatched++;
 		npwp->nw_handled++;
 		np[proto].np_handler(m);
 		error = 0;
 		goto out_unlock;
 	}
 
 	/*
 	 * Otherwise, we execute in a hybrid mode where we will try to direct
 	 * dispatch if we're on the right CPU and the netisr worker isn't
 	 * already running.
 	 */
-	m = netisr2_select_cpuid(&np[proto], source, m, &cpuid);
+	m = netisr_select_cpuid(&np[proto], source, m, &cpuid);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto out_unlock;
 	}
 	sched_pin();
 	if (cpuid != curcpu)
 		goto queue_fallback;
 	nwsp = &nws[cpuid];
 	npwp = &nwsp->nws_work[proto];
 
 	/*-
 	 * We are willing to direct dispatch only if three conditions hold:
 	 *
 	 * (1) The netisr worker isn't already running,
 	 * (2) Another thread isn't already directly dispatching, and
 	 * (3) The netisr hasn't already been woken up.
 	 */
 	NWS_LOCK(nwsp);
 	if (nwsp->nws_flags & (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED)) {
-		error = netisr2_queue_workstream(nwsp, proto, npwp, m,
+		error = netisr_queue_workstream(nwsp, proto, npwp, m,
 		    &dosignal);
 		NWS_UNLOCK(nws);
 		if (dosignal)
 			NWS_SIGNAL(nwsp);
 		goto out_unpin;
 	}
 
 	/*
 	 * The current thread is now effectively the netisr worker, so set
 	 * the dispatching flag to prevent concurrent processing of the
 	 * stream from another thread (even the netisr worker), which could
 	 * otherwise lead to effective misordering of the stream.
 	 */
 	nwsp->nws_flags |= NWS_DISPATCHING;
 	NWS_UNLOCK(nwsp);
 	np[proto].np_handler(m);
 	NWS_LOCK(nwsp);
 	nwsp->nws_flags &= ~NWS_DISPATCHING;
 	npwp->nw_handled++;
 	npwp->nw_hybrid_dispatched++;
 
 	/*
 	 * If other work was enqueued by another thread while we were direct
 	 * dispatching, we need to signal the netisr worker to do that work.
 	 * In the future, we might want to do some of that work in the
 	 * current thread, rather than trigger further context switches.  If
 	 * so, we'll want to establish a reasonable bound on the work done in
 	 * the "borrowed" context.
 	 */
 	if (nwsp->nws_pendingbits != 0) {
 		nwsp->nws_flags |= NWS_SCHEDULED;
 		dosignal = 1;
 	} else
 		dosignal = 0;
 	NWS_UNLOCK(nwsp);
 	if (dosignal)
 		NWS_SIGNAL(nwsp);
 	error = 0;
 	goto out_unpin;
 
 queue_fallback:
-	error = netisr2_queue_internal(proto, m, cpuid);
+	error = netisr_queue_internal(proto, m, cpuid);
 out_unpin:
 	sched_unpin();
 out_unlock:
-#ifdef NETISR2_LOCKING
+#ifdef NETISR_LOCKING
 	NETISR_RUNLOCK(&tracker);
 #endif
 	return (error);
 }
 
 int
 netisr_dispatch(u_int proto, struct mbuf *m)
 {
 
-	return (netisr2_dispatch_src(proto, 0, m));
+	return (netisr_dispatch_src(proto, 0, m));
 }
 
 #ifdef DEVICE_POLLING
 /*
- * Kernel polling borrows a netisr2 thread to run interface polling in; this
- * function allows kernel polling to request that the netisr2 thread be
+ * Kernel polling borrows a netisr thread to run interface polling in; this
+ * function allows kernel polling to request that the netisr thread be
  * scheduled even if no packets are pending for protocols.
  */
 void
-netisr2_sched_poll(void)
+netisr_sched_poll(void)
 {
 	struct netisr_workstream *nwsp;
 
 	nwsp = &nws[nws_array[0]];
 	NWS_SIGNAL(nwsp);
 }
 #endif
 
 static void
-netisr2_start_swi(u_int cpuid, struct pcpu *pc)
+netisr_start_swi(u_int cpuid, struct pcpu *pc)
 {
 	char swiname[12];
 	struct netisr_workstream *nwsp;
 	int error;
 
 	nwsp = &nws[cpuid];
-	mtx_init(&nwsp->nws_mtx, "netisr2_mtx", NULL, MTX_DEF);
+	mtx_init(&nwsp->nws_mtx, "netisr_mtx", NULL, MTX_DEF);
 	nwsp->nws_cpu = cpuid;
 	snprintf(swiname, sizeof(swiname), "netisr %d", cpuid);
 	error = swi_add(&nwsp->nws_intr_event, swiname, swi_net, nwsp,
 	    SWI_NET, INTR_MPSAFE, &nwsp->nws_swi_cookie);
 	if (error)
-		panic("netisr2_init: swi_add %d", error);
-	pc->pc_netisr2 = nwsp->nws_intr_event;
+		panic("%s: swi_add %d", __func__, error);
+	pc->pc_netisr = nwsp->nws_intr_event;
 	if (netisr_bindthreads) {
 		error = intr_event_bind(nwsp->nws_intr_event, cpuid);
 		if (error != 0)
-			printf("netisr2_start_swi cpu %d: intr_event_bind: %d",
+			printf("%s: cpu %d: intr_event_bind: %d", __func__,
 			    cpuid, error);
 	}
 	NETISR_WLOCK();
 	nws_array[nws_count] = nwsp->nws_cpu;
 	nws_count++;
 	NETISR_WUNLOCK();
 }
 
 /*
  * Initialize the netisr subsystem.  We rely on BSS and static initialization
  * of most fields in global data structures.
  *
  * Start a worker thread for the boot CPU so that we can support network
  * traffic immediately in case the netowrk stack is used before additional
  * CPUs are started (for example, diskless boot).
  */
 static void
-netisr2_init(void *arg)
+netisr_init(void *arg)
 {
 
-	KASSERT(curcpu == 0, ("netisr2_init: not on CPU 0"));
+	KASSERT(curcpu == 0, ("%s: not on CPU 0", __func__));
 
 	NETISR_LOCK_INIT();
 	if (netisr_maxthreads < 1)
 		netisr_maxthreads = 1;
 	if (netisr_maxthreads > MAXCPU)
 		netisr_maxthreads = MAXCPU;
 #ifdef DEVICE_POLLING
 	/*
 	 * The device polling code is not yet aware of how to deal with
 	 * multiple netisr threads, so for the time being compiling in device
 	 * polling disables parallel netisr workers.
 	 */
 	netisr_maxthreads = 1;
 	netisr_bindthreads = 0;
 #endif
 
-	netisr2_start_swi(curcpu, pcpu_find(curcpu));
+	netisr_start_swi(curcpu, pcpu_find(curcpu));
 }
-SYSINIT(netisr2_init, SI_SUB_SOFTINTR, SI_ORDER_FIRST, netisr2_init, NULL);
+SYSINIT(netisr_init, SI_SUB_SOFTINTR, SI_ORDER_FIRST, netisr_init, NULL);
 
 /*
  * Start worker threads for additional CPUs.  No attempt to gracefully handle
  * work reassignment, we don't yet support dynamic reconfiguration.
  */
 static void
-netisr2_start(void *arg)
+netisr_start(void *arg)
 {
 	struct pcpu *pc;
 
 	SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
 		if (nws_count >= netisr_maxthreads)
 			break;
 		/* XXXRW: Is skipping absent CPUs still required here? */
 		if (CPU_ABSENT(pc->pc_cpuid))
 			continue;
 		/* Worker will already be present for boot CPU. */
-		if (pc->pc_netisr2 != NULL)
+		if (pc->pc_netisr != NULL)
 			continue;
-		netisr2_start_swi(pc->pc_cpuid, pc);
+		netisr_start_swi(pc->pc_cpuid, pc);
 	}
 }
-SYSINIT(netisr2_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr2_start, NULL);
+SYSINIT(netisr_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr_start, NULL);
 
 #ifdef DDB
-DB_SHOW_COMMAND(netisr2, db_show_netisr2)
+DB_SHOW_COMMAND(netisr, db_show_netisr)
 {
 	struct netisr_workstream *nwsp;
 	struct netisr_work *nwp;
 	int cpu, first, proto;
 
 	db_printf("%3s %6s %5s %5s %5s %8s %8s %8s %8s\n", "CPU", "Proto",
 	    "Len", "WMark", "Max", "Disp", "HDisp", "Drop", "Queue");
 	for (cpu = 0; cpu < MAXCPU; cpu++) {
 		nwsp = &nws[cpu];
 		if (nwsp->nws_intr_event == NULL)
 			continue;
 		first = 1;
 		for (proto = 0; proto < NETISR_MAXPROT; proto++) {
 			if (np[proto].np_handler == NULL)
 				continue;
 			nwp = &nwsp->nws_work[proto];
 			if (first) {
 				db_printf("%3d ", cpu);
 				first = 0;
 			} else
 				db_printf("%3s ", "");
 			db_printf(
 			    "%6s %5d %5d %5d %8ju %8ju %8ju %8ju\n",
 			    np[proto].np_name, nwp->nw_len,
 			    nwp->nw_watermark, nwp->nw_qlimit,
 			    nwp->nw_dispatched, nwp->nw_hybrid_dispatched,
 			    nwp->nw_qdrops, nwp->nw_queued);
 		}
 	}
 }
 #endif
Index: projects/pnet/sys/net/netisr.h
===================================================================
--- projects/pnet/sys/net/netisr.h	(revision 193105)
+++ projects/pnet/sys/net/netisr.h	(revision 193106)
@@ -1,157 +1,155 @@
 /*-
  * Copyright (c) 2007-2009 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef _NET_NETISR_H_
 #define _NET_NETISR_H_
 
 #ifndef _KERNEL
 #error "no user-serviceable parts inside"
 #endif
 
 /*
  * The netisr (network interrupt service routine) provides a deferred
  * execution evironment in which (generally inbound) network processing can
  * take place.  Protocols register handlers which will be executed directly,
  * or via deferred dispatch, depending on the circumstances.
  *
  * Historically, this was implemented by the BSD software ISR facility; it is
  * now implemented via a software ithread (SWI).
  */
 #define	NETISR_POLL	0		/* polling callback, must be first */
 #define	NETISR_IP	2		/* same as AF_INET */
 #define	NETISR_IGMP	3		/* IGMPv3 output queue */
 #define	NETISR_ROUTE	14		/* routing socket */
 #define	NETISR_AARP	15		/* Appletalk ARP */
 #define	NETISR_ATALK2	16		/* Appletalk phase 2 */
 #define	NETISR_ATALK1	17		/* Appletalk phase 1 */
 #define	NETISR_ARP	18		/* same as AF_LINK */
 #define	NETISR_IPX	23		/* same as AF_IPX */
 #define	NETISR_ETHER	24		/* ethernet input */
 #define	NETISR_IPV6	27
 #define	NETISR_NATM	28
 #define	NETISR_POLLMORE	31		/* polling callback, must be last */
 
 /*-
  * Protocols express ordering constraints and affinity preferences by
  * implementing one or neither of nh_m2flow and nh_m2cpuid, which are used by
- * netisr2 to determine which per-CPU workstream to assign mbufs to.
+ * netisr to determine which per-CPU workstream to assign mbufs to.
  *
  * The following policies may be used by protocols:
  *
- * NETISR_POLICY_SOURCE - netisr2 should maintain source ordering without
- *                        advice from the protocol.  netisr2 will ignore any
+ * NETISR_POLICY_SOURCE - netisr should maintain source ordering without
+ *                        advice from the protocol.  netisr will ignore any
  *                        flow IDs present on the mbuf for the purposes of
  *                        work placement.
  *
- * NETISR_POLICY_FLOW - netisr2 should maintain flow ordering as defined by
+ * NETISR_POLICY_FLOW - netisr should maintain flow ordering as defined by
  *                      the mbuf header flow ID field.  If the protocol
- *                      implements nh_m2flow, then netisr2 will query the
+ *                      implements nh_m2flow, then netisr will query the
  *                      protocol in the event that the mbuf doesn't have a
  *                      flow ID, falling back on source ordering.
  *
- * NETISR_POLICY_CPU - netisr2 will delegate all work placement decisions to
+ * NETISR_POLICY_CPU - netisr will delegate all work placement decisions to
  *                     the protocol, querying nh_m2cpuid for each packet.
  *
  * Protocols might make decisions about work placement based on an existing
  * calculated flow ID on the mbuf, such as one provided in hardware, the
  * receive interface pointed to by the mbuf (if any), the optional source
  * identifier passed at some dispatch points, or even parse packet headers to
  * calculate a flow.  Both protocol handlers may return a new mbuf pointer
  * for the chain, or NULL if the packet proves invalid or m_pullup() fails.
  *
  * XXXRW: If we eventually support dynamic reconfiguration, there should be
  * protocol handlers to notify them of CPU configuration changes so that they
  * can rebalance work.
  */
 struct mbuf;
-typedef void		 netisr_t (struct mbuf *m);
+typedef void		 netisr_handler_t (struct mbuf *m);
 typedef struct mbuf	*netisr_m2cpuid_t(struct mbuf *m, uintptr_t source,
 			 u_int *cpuid);
 typedef	struct mbuf	*netisr_m2flow_t(struct mbuf *m, uintptr_t source);
 
 #define	NETISR_POLICY_SOURCE	1	/* Maintain source ordering. */
 #define	NETISR_POLICY_FLOW	2	/* Maintain flow ordering. */
 #define	NETISR_POLICY_CPU	3	/* Protocol determines CPU placement. */
 
 /*
  * Data structure describing a protocol handler.
  */
 struct netisr_handler {
 	const char	*nh_name;	/* Character string protocol name. */
-	netisr_t	*nh_handler;	/* Protocol handler. */
+	netisr_handler_t *nh_handler;	/* Protocol handler. */
 	netisr_m2flow_t	*nh_m2flow;	/* Query flow for untagged packet. */
 	netisr_m2cpuid_t *nh_m2cpuid;	/* Query CPU to process mbuf on. */
 	u_int		 nh_proto;	/* Integer protocol ID. */
 	u_int		 nh_qlimit;	/* Maximum per-CPU queue depth. */
 	u_int		 nh_policy;	/* Work placement policy. */
 	u_int		 nh_ispare[5];	/* For future use. */
 	void		*nh_pspare[4];	/* For future use. */
 };
 
 /*
- * Register, unregister, and other netisr2 handler management functions.
+ * Register, unregister, and other netisr handler management functions.
  */
-void	netisr2_clearqdrops(const struct netisr_handler *nhp);
-void	netisr2_getqdrops(const struct netisr_handler *nhp,
+void	netisr_clearqdrops(const struct netisr_handler *nhp);
+void	netisr_getqdrops(const struct netisr_handler *nhp,
 	    u_int64_t *qdropsp);
-void	netisr2_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp);
-void	netisr2_register(const struct netisr_handler *nhp);
-int	netisr2_setqlimit(const struct netisr_handler *nhp, u_int qlimit);
-void	netisr2_unregister(const struct netisr_handler *nhp);
+void	netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp);
+void	netisr_register(const struct netisr_handler *nhp);
+int	netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit);
+void	netisr_unregister(const struct netisr_handler *nhp);
 
 /*
  * Process a packet destined for a protocol, and attempt direct dispatch.
  * Supplemental source ordering information can be passed using the _src
  * variant.
  */
 int	netisr_dispatch(u_int proto, struct mbuf *m);
+int	netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m);
 int	netisr_queue(u_int proto, struct mbuf *m);
-int	netisr2_dispatch(u_int proto, struct mbuf *m);
-int	netisr2_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m);
-int	netisr2_queue(u_int proto, struct mbuf *m);
-int	netisr2_queue_src(u_int proto, uintptr_t source, struct mbuf *m);
+int	netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m);
 
 /*
  * Provide a default implementation of "map a ID to a cpu ID".
  */
-u_int	netisr2_default_flow2cpu(u_int flowid);
+u_int	netisr_default_flow2cpu(u_int flowid);
 
 /*
- * Utility routines to return the number of CPUs participting in netisr2, and
+ * Utility routines to return the number of CPUs participting in netisr, and
  * to return a mapping from a number to a CPU ID that can be used with the
  * scheduler.
  */
-u_int	netisr2_get_cpucount(void);
-u_int	netisr2_get_cpuid(u_int cpunumber);
+u_int	netisr_get_cpucount(void);
+u_int	netisr_get_cpuid(u_int cpunumber);
 
 /*
- * Interfaces between DEVICE_POLLING and netisr2.
+ * Interfaces between DEVICE_POLLING and netisr.
  */
-void	netisr2_sched_poll(void);
+void	netisr_sched_poll(void);
 void	netisr_poll(void);
 void	netisr_pollmore(void);
 
 #endif /* !_NET_NETISR_H_ */
Index: projects/pnet/sys/net/rtsock.c
===================================================================
--- projects/pnet/sys/net/rtsock.c	(revision 193105)
+++ projects/pnet/sys/net/rtsock.c	(revision 193106)
@@ -1,1522 +1,1522 @@
 /*-
  * Copyright (c) 1988, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)rtsock.c	8.7 (Berkeley) 10/12/95
  * $FreeBSD$
  */
 #include "opt_sctp.h"
 #include "opt_mpath.h"
 #include "opt_route.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/domain.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/rwlock.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/vimage.h>
 
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/if_llatbl.h>
 #include <net/netisr.h>
 #include <net/raw_cb.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #ifdef INET6
 #include <netinet6/scope6_var.h>
 #endif
 
 #ifdef SCTP
 extern void sctp_addr_change(struct ifaddr *ifa, int cmd);
 #endif /* SCTP */
 
 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
 
 /* NB: these are not modified */
 static struct	sockaddr route_src = { 2, PF_ROUTE, };
 static struct	sockaddr sa_zero   = { sizeof(sa_zero), AF_INET, };
 
 static struct {
 	int	ip_count;	/* attached w/ AF_INET */
 	int	ip6_count;	/* attached w/ AF_INET6 */
 	int	ipx_count;	/* attached w/ AF_IPX */
 	int	any_count;	/* total attached */
 } route_cb;
 
 struct mtx rtsock_mtx;
 MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
 
 #define	RTSOCK_LOCK()	mtx_lock(&rtsock_mtx)
 #define	RTSOCK_UNLOCK()	mtx_unlock(&rtsock_mtx)
 #define	RTSOCK_LOCK_ASSERT()	mtx_assert(&rtsock_mtx, MA_OWNED)
 
 SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, "");
 
 struct walkarg {
 	int	w_tmemsize;
 	int	w_op, w_arg;
 	caddr_t	w_tmem;
 	struct sysctl_req *w_req;
 };
 
 static void	rts_input(struct mbuf *m);
 static struct mbuf *rt_msg1(int type, struct rt_addrinfo *rtinfo);
 static int	rt_msg2(int type, struct rt_addrinfo *rtinfo,
 			caddr_t cp, struct walkarg *w);
 static int	rt_xaddrs(caddr_t cp, caddr_t cplim,
 			struct rt_addrinfo *rtinfo);
 static int	sysctl_dumpentry(struct radix_node *rn, void *vw);
 static int	sysctl_iflist(int af, struct walkarg *w);
 static int	sysctl_ifmalist(int af, struct walkarg *w);
 static int	route_output(struct mbuf *m, struct socket *so);
 static void	rt_setmetrics(u_long which, const struct rt_metrics *in,
 			struct rt_metrics_lite *out);
 static void	rt_getmetrics(const struct rt_metrics_lite *in,
 			struct rt_metrics *out);
 static void	rt_dispatch(struct mbuf *, const struct sockaddr *);
 
 static struct netisr_handler rtsock_nh = {
 	.nh_name = "rtsock",
 	.nh_handler = rts_input,
 	.nh_proto = NETISR_ROUTE,
 	.nh_qlimit = 256,
 	.nh_policy = NETISR_POLICY_SOURCE,
 };
 
 static int
 sysctl_route_netisr_maxqlen(SYSCTL_HANDLER_ARGS)
 {
 	int error, qlimit;
 
-	netisr2_getqlimit(&rtsock_nh, &qlimit);
+	netisr_getqlimit(&rtsock_nh, &qlimit);
 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
         if (error || !req->newptr)
                 return (error);
 	if (qlimit < 1)
 		return (EINVAL);
-	return (netisr2_setqlimit(&rtsock_nh, qlimit));
+	return (netisr_setqlimit(&rtsock_nh, qlimit));
 }
 SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW,
     0, 0, sysctl_route_netisr_maxqlen, "I",
     "maximum routing socket dispatch queue length");
 
 static void
 rts_init(void)
 {
 	int tmp;
 
 	if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
 		rtsock_nh.nh_qlimit = tmp;
-	netisr2_register(&rtsock_nh);
+	netisr_register(&rtsock_nh);
 }
 SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0);
 
 static void
 rts_input(struct mbuf *m)
 {
 	struct sockproto route_proto;
 	unsigned short *family;
 	struct m_tag *tag;
 
 	route_proto.sp_family = PF_ROUTE;
 	tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL);
 	if (tag != NULL) {
 		family = (unsigned short *)(tag + 1);
 		route_proto.sp_protocol = *family;
 		m_tag_delete(m, tag);
 	} else
 		route_proto.sp_protocol = 0;
 
 	raw_input(m, &route_proto, &route_src);
 }
 
 /*
  * It really doesn't make any sense at all for this code to share much
  * with raw_usrreq.c, since its functionality is so restricted.  XXX
  */
 static void
 rts_abort(struct socket *so)
 {
 
 	raw_usrreqs.pru_abort(so);
 }
 
 static void
 rts_close(struct socket *so)
 {
 
 	raw_usrreqs.pru_close(so);
 }
 
 /* pru_accept is EOPNOTSUPP */
 
 static int
 rts_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct rawcb *rp;
 	int s, error;
 
 	KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL"));
 
 	/* XXX */
 	rp = malloc(sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
 	if (rp == NULL)
 		return ENOBUFS;
 
 	/*
 	 * The splnet() is necessary to block protocols from sending
 	 * error notifications (like RTM_REDIRECT or RTM_LOSING) while
 	 * this PCB is extant but incompletely initialized.
 	 * Probably we should try to do more of this work beforehand and
 	 * eliminate the spl.
 	 */
 	s = splnet();
 	so->so_pcb = (caddr_t)rp;
 	so->so_fibnum = td->td_proc->p_fibnum;
 	error = raw_attach(so, proto);
 	rp = sotorawcb(so);
 	if (error) {
 		splx(s);
 		so->so_pcb = NULL;
 		free(rp, M_PCB);
 		return error;
 	}
 	RTSOCK_LOCK();
 	switch(rp->rcb_proto.sp_protocol) {
 	case AF_INET:
 		route_cb.ip_count++;
 		break;
 	case AF_INET6:
 		route_cb.ip6_count++;
 		break;
 	case AF_IPX:
 		route_cb.ipx_count++;
 		break;
 	}
 	route_cb.any_count++;
 	RTSOCK_UNLOCK();
 	soisconnected(so);
 	so->so_options |= SO_USELOOPBACK;
 	splx(s);
 	return 0;
 }
 
 static int
 rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */
 }
 
 static int
 rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */
 }
 
 /* pru_connect2 is EOPNOTSUPP */
 /* pru_control is EOPNOTSUPP */
 
 static void
 rts_detach(struct socket *so)
 {
 	struct rawcb *rp = sotorawcb(so);
 
 	KASSERT(rp != NULL, ("rts_detach: rp == NULL"));
 
 	RTSOCK_LOCK();
 	switch(rp->rcb_proto.sp_protocol) {
 	case AF_INET:
 		route_cb.ip_count--;
 		break;
 	case AF_INET6:
 		route_cb.ip6_count--;
 		break;
 	case AF_IPX:
 		route_cb.ipx_count--;
 		break;
 	}
 	route_cb.any_count--;
 	RTSOCK_UNLOCK();
 	raw_usrreqs.pru_detach(so);
 }
 
 static int
 rts_disconnect(struct socket *so)
 {
 
 	return (raw_usrreqs.pru_disconnect(so));
 }
 
 /* pru_listen is EOPNOTSUPP */
 
 static int
 rts_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 
 	return (raw_usrreqs.pru_peeraddr(so, nam));
 }
 
 /* pru_rcvd is EOPNOTSUPP */
 /* pru_rcvoob is EOPNOTSUPP */
 
 static int
 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
 	 struct mbuf *control, struct thread *td)
 {
 
 	return (raw_usrreqs.pru_send(so, flags, m, nam, control, td));
 }
 
 /* pru_sense is null */
 
 static int
 rts_shutdown(struct socket *so)
 {
 
 	return (raw_usrreqs.pru_shutdown(so));
 }
 
 static int
 rts_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 
 	return (raw_usrreqs.pru_sockaddr(so, nam));
 }
 
 static struct pr_usrreqs route_usrreqs = {
 	.pru_abort =		rts_abort,
 	.pru_attach =		rts_attach,
 	.pru_bind =		rts_bind,
 	.pru_connect =		rts_connect,
 	.pru_detach =		rts_detach,
 	.pru_disconnect =	rts_disconnect,
 	.pru_peeraddr =		rts_peeraddr,
 	.pru_send =		rts_send,
 	.pru_shutdown =		rts_shutdown,
 	.pru_sockaddr =		rts_sockaddr,
 	.pru_close =		rts_close,
 };
 
 #ifndef _SOCKADDR_UNION_DEFINED
 #define	_SOCKADDR_UNION_DEFINED
 /*
  * The union of all possible address formats we handle.
  */
 union sockaddr_union {
 	struct sockaddr		sa;
 	struct sockaddr_in	sin;
 	struct sockaddr_in6	sin6;
 };
 #endif /* _SOCKADDR_UNION_DEFINED */
 
 static int
 rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
     struct rtentry *rt, union sockaddr_union *saun, struct ucred *cred)
 {
 
 	/* First, see if the returned address is part of the jail. */
 	if (prison_if(cred, rt->rt_ifa->ifa_addr) == 0) {
 		info->rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
 		return (0);
 	}
 
 	switch (info->rti_info[RTAX_DST]->sa_family) {
 #ifdef INET
 	case AF_INET:
 	{
 		struct in_addr ia;
 		struct ifaddr *ifa;
 		int found;
 
 		found = 0;
 		/*
 		 * Try to find an address on the given outgoing interface
 		 * that belongs to the jail.
 		 */
 		IF_ADDR_LOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			struct sockaddr *sa;
 			sa = ifa->ifa_addr;
 			if (sa->sa_family != AF_INET)
 				continue;
 			ia = ((struct sockaddr_in *)sa)->sin_addr;
 			if (prison_check_ip4(cred, &ia) == 0) {
 				found = 1;
 				break;
 			}
 		}
 		IF_ADDR_UNLOCK(ifp);
 		if (!found) {
 			/*
 			 * As a last resort return the 'default' jail address.
 			 */
 			ia = ((struct sockaddr_in *)rt->rt_ifa->ifa_addr)->
 			    sin_addr;
 			if (prison_get_ip4(cred, &ia) != 0)
 				return (ESRCH);
 		}
 		bzero(&saun->sin, sizeof(struct sockaddr_in));
 		saun->sin.sin_len = sizeof(struct sockaddr_in);
 		saun->sin.sin_family = AF_INET;
 		saun->sin.sin_addr.s_addr = ia.s_addr;
 		info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin;
 		break;
 	}
 #endif
 #ifdef INET6
 	case AF_INET6:
 	{
 		struct in6_addr ia6;
 		struct ifaddr *ifa;
 		int found;
 
 		found = 0;
 		/*
 		 * Try to find an address on the given outgoing interface
 		 * that belongs to the jail.
 		 */
 		IF_ADDR_LOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			struct sockaddr *sa;
 			sa = ifa->ifa_addr;
 			if (sa->sa_family != AF_INET6)
 				continue;
 			bcopy(&((struct sockaddr_in6 *)sa)->sin6_addr,
 			    &ia6, sizeof(struct in6_addr));
 			if (prison_check_ip6(cred, &ia6) == 0) {
 				found = 1;
 				break;
 			}
 		}
 		IF_ADDR_UNLOCK(ifp);
 		if (!found) {
 			/*
 			 * As a last resort return the 'default' jail address.
 			 */
 			ia6 = ((struct sockaddr_in6 *)rt->rt_ifa->ifa_addr)->
 			    sin6_addr;
 			if (prison_get_ip6(cred, &ia6) != 0)
 				return (ESRCH);
 		}
 		bzero(&saun->sin6, sizeof(struct sockaddr_in6));
 		saun->sin6.sin6_len = sizeof(struct sockaddr_in6);
 		saun->sin6.sin6_family = AF_INET6;
 		bcopy(&ia6, &saun->sin6.sin6_addr, sizeof(struct in6_addr));
 		if (sa6_recoverscope(&saun->sin6) != 0)
 			return (ESRCH);
 		info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin6;
 		break;
 	}
 #endif
 	default:
 		return (ESRCH);
 	}
 	return (0);
 }
 
 /*ARGSUSED*/
 static int
 route_output(struct mbuf *m, struct socket *so)
 {
 #define	sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
 	INIT_VNET_NET(so->so_vnet);
 	struct rt_msghdr *rtm = NULL;
 	struct rtentry *rt = NULL;
 	struct radix_node_head *rnh;
 	struct rt_addrinfo info;
 	int len, error = 0;
 	struct ifnet *ifp = NULL;
 	union sockaddr_union saun;
 
 #define senderr(e) { error = e; goto flush;}
 	if (m == NULL || ((m->m_len < sizeof(long)) &&
 		       (m = m_pullup(m, sizeof(long))) == NULL))
 		return (ENOBUFS);
 	if ((m->m_flags & M_PKTHDR) == 0)
 		panic("route_output");
 	len = m->m_pkthdr.len;
 	if (len < sizeof(*rtm) ||
 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
 		info.rti_info[RTAX_DST] = NULL;
 		senderr(EINVAL);
 	}
 	R_Malloc(rtm, struct rt_msghdr *, len);
 	if (rtm == NULL) {
 		info.rti_info[RTAX_DST] = NULL;
 		senderr(ENOBUFS);
 	}
 	m_copydata(m, 0, len, (caddr_t)rtm);
 	if (rtm->rtm_version != RTM_VERSION) {
 		info.rti_info[RTAX_DST] = NULL;
 		senderr(EPROTONOSUPPORT);
 	}
 	rtm->rtm_pid = curproc->p_pid;
 	bzero(&info, sizeof(info));
 	info.rti_addrs = rtm->rtm_addrs;
 	if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
 		info.rti_info[RTAX_DST] = NULL;
 		senderr(EINVAL);
 	}
 	info.rti_flags = rtm->rtm_flags;
 	if (info.rti_info[RTAX_DST] == NULL ||
 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
 	    (info.rti_info[RTAX_GATEWAY] != NULL &&
 	     info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX))
 		senderr(EINVAL);
 	/*
 	 * Verify that the caller has the appropriate privilege; RTM_GET
 	 * is the only operation the non-superuser is allowed.
 	 */
 	if (rtm->rtm_type != RTM_GET) {
 		error = priv_check(curthread, PRIV_NET_ROUTE);
 		if (error)
 			senderr(error);
 	}
 
 	switch (rtm->rtm_type) {
 		struct rtentry *saved_nrt;
 
 	case RTM_ADD:
 		if (info.rti_info[RTAX_GATEWAY] == NULL)
 			senderr(EINVAL);
 		saved_nrt = NULL;
 
 		/* support for new ARP code */
 		if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
 		    (rtm->rtm_flags & RTF_LLDATA) != 0) {
 			error = lla_rt_output(rtm, &info);
 			break;
 		}
 		error = rtrequest1_fib(RTM_ADD, &info, &saved_nrt,
 		    so->so_fibnum);
 		if (error == 0 && saved_nrt) {
 			RT_LOCK(saved_nrt);
 			rt_setmetrics(rtm->rtm_inits,
 				&rtm->rtm_rmx, &saved_nrt->rt_rmx);
 			rtm->rtm_index = saved_nrt->rt_ifp->if_index;
 			RT_REMREF(saved_nrt);
 			RT_UNLOCK(saved_nrt);
 		}
 		break;
 
 	case RTM_DELETE:
 		saved_nrt = NULL;
 		/* support for new ARP code */
 		if (info.rti_info[RTAX_GATEWAY] && 
 		    (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK) &&
 		    (rtm->rtm_flags & RTF_LLDATA) != 0) {
 			error = lla_rt_output(rtm, &info);
 			break;
 		}
 		error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt,
 		    so->so_fibnum);
 		if (error == 0) {
 			RT_LOCK(saved_nrt);
 			rt = saved_nrt;
 			goto report;
 		}
 		break;
 
 	case RTM_GET:
 	case RTM_CHANGE:
 	case RTM_LOCK:
 		rnh = V_rt_tables[so->so_fibnum][info.rti_info[RTAX_DST]->sa_family];
 		if (rnh == NULL)
 			senderr(EAFNOSUPPORT);
 		RADIX_NODE_HEAD_RLOCK(rnh);
 		rt = (struct rtentry *) rnh->rnh_lookup(info.rti_info[RTAX_DST],
 			info.rti_info[RTAX_NETMASK], rnh);
 		if (rt == NULL) {	/* XXX looks bogus */
 			RADIX_NODE_HEAD_RUNLOCK(rnh);
 			senderr(ESRCH);
 		}
 #ifdef RADIX_MPATH
 		/*
 		 * for RTM_CHANGE/LOCK, if we got multipath routes,
 		 * we require users to specify a matching RTAX_GATEWAY.
 		 *
 		 * for RTM_GET, gate is optional even with multipath.
 		 * if gate == NULL the first match is returned.
 		 * (no need to call rt_mpath_matchgate if gate == NULL)
 		 */
 		if (rn_mpath_capable(rnh) &&
 		    (rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) {
 			rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]);
 			if (!rt) {
 				RADIX_NODE_HEAD_RUNLOCK(rnh);
 				senderr(ESRCH);
 			}
 		}
 #endif
 		RT_LOCK(rt);
 		RT_ADDREF(rt);
 		RADIX_NODE_HEAD_RUNLOCK(rnh);
 
 		/* 
 		 * Fix for PR: 82974
 		 *
 		 * RTM_CHANGE/LOCK need a perfect match, rn_lookup()
 		 * returns a perfect match in case a netmask is
 		 * specified.  For host routes only a longest prefix
 		 * match is returned so it is necessary to compare the
 		 * existence of the netmask.  If both have a netmask
 		 * rnh_lookup() did a perfect match and if none of them
 		 * have a netmask both are host routes which is also a
 		 * perfect match.
 		 */
 
 		if (rtm->rtm_type != RTM_GET && 
 		    (!rt_mask(rt) != !info.rti_info[RTAX_NETMASK])) {
 			RT_UNLOCK(rt);
 			senderr(ESRCH);
 		}
 
 		switch(rtm->rtm_type) {
 
 		case RTM_GET:
 		report:
 			RT_LOCK_ASSERT(rt);
 			if ((rt->rt_flags & RTF_HOST) == 0
 			    ? jailed(curthread->td_ucred)
 			    : prison_if(curthread->td_ucred,
 			    rt_key(rt)) != 0) {
 				RT_UNLOCK(rt);
 				senderr(ESRCH);
 			}
 			info.rti_info[RTAX_DST] = rt_key(rt);
 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 			info.rti_info[RTAX_GENMASK] = 0;
 			if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
 				ifp = rt->rt_ifp;
 				if (ifp) {
 					info.rti_info[RTAX_IFP] =
 					    ifp->if_addr->ifa_addr;
 					error = rtm_get_jailed(&info, ifp, rt,
 					    &saun, curthread->td_ucred);
 					if (error != 0) {
 						RT_UNLOCK(rt);
 						senderr(error);
 					}
 					if (ifp->if_flags & IFF_POINTOPOINT)
 						info.rti_info[RTAX_BRD] =
 						    rt->rt_ifa->ifa_dstaddr;
 					rtm->rtm_index = ifp->if_index;
 				} else {
 					info.rti_info[RTAX_IFP] = NULL;
 					info.rti_info[RTAX_IFA] = NULL;
 				}
 			} else if ((ifp = rt->rt_ifp) != NULL) {
 				rtm->rtm_index = ifp->if_index;
 			}
 			len = rt_msg2(rtm->rtm_type, &info, NULL, NULL);
 			if (len > rtm->rtm_msglen) {
 				struct rt_msghdr *new_rtm;
 				R_Malloc(new_rtm, struct rt_msghdr *, len);
 				if (new_rtm == NULL) {
 					RT_UNLOCK(rt);
 					senderr(ENOBUFS);
 				}
 				bcopy(rtm, new_rtm, rtm->rtm_msglen);
 				Free(rtm); rtm = new_rtm;
 			}
 			(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL);
 			rtm->rtm_flags = rt->rt_flags;
 			rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
 			rtm->rtm_addrs = info.rti_addrs;
 			break;
 
 		case RTM_CHANGE:
 			/*
 			 * New gateway could require new ifaddr, ifp;
 			 * flags may also be different; ifp may be specified
 			 * by ll sockaddr when protocol address is ambiguous
 			 */
 			if (((rt->rt_flags & RTF_GATEWAY) &&
 			     info.rti_info[RTAX_GATEWAY] != NULL) ||
 			    info.rti_info[RTAX_IFP] != NULL ||
 			    (info.rti_info[RTAX_IFA] != NULL &&
 			     !sa_equal(info.rti_info[RTAX_IFA],
 				       rt->rt_ifa->ifa_addr))) {
 				RT_UNLOCK(rt);
 				RADIX_NODE_HEAD_LOCK(rnh);
 				error = rt_getifa_fib(&info, rt->rt_fibnum);
 				RADIX_NODE_HEAD_UNLOCK(rnh);
 				if (error != 0)
 					senderr(error);
 				RT_LOCK(rt);
 			}
 			if (info.rti_ifa != NULL &&
 			    info.rti_ifa != rt->rt_ifa &&
 			    rt->rt_ifa != NULL &&
 			    rt->rt_ifa->ifa_rtrequest != NULL) {
 				rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt,
 				    &info);
 				IFAFREE(rt->rt_ifa);
 			}
 			if (info.rti_info[RTAX_GATEWAY] != NULL) {
 				RT_UNLOCK(rt);
 				RADIX_NODE_HEAD_LOCK(rnh);
 				RT_LOCK(rt);
 				
 				error = rt_setgate(rt, rt_key(rt),
 				    info.rti_info[RTAX_GATEWAY]);
 				RADIX_NODE_HEAD_UNLOCK(rnh);
 				if (error != 0) {
 					RT_UNLOCK(rt);
 					senderr(error);
 				}
 				rt->rt_flags |= RTF_GATEWAY;
 			}
 			if (info.rti_ifa != NULL &&
 			    info.rti_ifa != rt->rt_ifa) {
 				IFAREF(info.rti_ifa);
 				rt->rt_ifa = info.rti_ifa;
 				rt->rt_ifp = info.rti_ifp;
 			}
 			/* Allow some flags to be toggled on change. */
 			rt->rt_flags = (rt->rt_flags & ~RTF_FMASK) |
 				    (rtm->rtm_flags & RTF_FMASK);
 			rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
 					&rt->rt_rmx);
 			rtm->rtm_index = rt->rt_ifp->if_index;
 			if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
 			       rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
 			/* FALLTHROUGH */
 		case RTM_LOCK:
 			/* We don't support locks anymore */
 			break;
 		}
 		RT_UNLOCK(rt);
 		break;
 
 	default:
 		senderr(EOPNOTSUPP);
 	}
 
 flush:
 	if (rtm) {
 		if (error)
 			rtm->rtm_errno = error;
 		else
 			rtm->rtm_flags |= RTF_DONE;
 	}
 	if (rt)		/* XXX can this be true? */
 		RTFREE(rt);
     {
 	struct rawcb *rp = NULL;
 	/*
 	 * Check to see if we don't want our own messages.
 	 */
 	if ((so->so_options & SO_USELOOPBACK) == 0) {
 		if (route_cb.any_count <= 1) {
 			if (rtm)
 				Free(rtm);
 			m_freem(m);
 			return (error);
 		}
 		/* There is another listener, so construct message */
 		rp = sotorawcb(so);
 	}
 	if (rtm) {
 		m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
 		if (m->m_pkthdr.len < rtm->rtm_msglen) {
 			m_freem(m);
 			m = NULL;
 		} else if (m->m_pkthdr.len > rtm->rtm_msglen)
 			m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
 		Free(rtm);
 	}
 	if (m) {
 		if (rp) {
 			/*
 			 * XXX insure we don't get a copy by
 			 * invalidating our protocol
 			 */
 			unsigned short family = rp->rcb_proto.sp_family;
 			rp->rcb_proto.sp_family = 0;
 			rt_dispatch(m, info.rti_info[RTAX_DST]);
 			rp->rcb_proto.sp_family = family;
 		} else
 			rt_dispatch(m, info.rti_info[RTAX_DST]);
 	}
     }
 	return (error);
 #undef	sa_equal
 }
 
 static void
 rt_setmetrics(u_long which, const struct rt_metrics *in,
 	struct rt_metrics_lite *out)
 {
 #define metric(f, e) if (which & (f)) out->e = in->e;
 	/*
 	 * Only these are stored in the routing entry since introduction
 	 * of tcp hostcache. The rest is ignored.
 	 */
 	metric(RTV_MTU, rmx_mtu);
 	metric(RTV_WEIGHT, rmx_weight);
 	/* Userland -> kernel timebase conversion. */
 	if (which & RTV_EXPIRE)
 		out->rmx_expire = in->rmx_expire ?
 		    in->rmx_expire - time_second + time_uptime : 0;
 #undef metric
 }
 
 static void
 rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out)
 {
 #define metric(e) out->e = in->e;
 	bzero(out, sizeof(*out));
 	metric(rmx_mtu);
 	metric(rmx_weight);
 	/* Kernel -> userland timebase conversion. */
 	out->rmx_expire = in->rmx_expire ?
 	    in->rmx_expire - time_uptime + time_second : 0;
 #undef metric
 }
 
 /*
  * Extract the addresses of the passed sockaddrs.
  * Do a little sanity checking so as to avoid bad memory references.
  * This data is derived straight from userland.
  */
 static int
 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
 {
 	struct sockaddr *sa;
 	int i;
 
 	for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
 			continue;
 		sa = (struct sockaddr *)cp;
 		/*
 		 * It won't fit.
 		 */
 		if (cp + sa->sa_len > cplim)
 			return (EINVAL);
 		/*
 		 * there are no more.. quit now
 		 * If there are more bits, they are in error.
 		 * I've seen this. route(1) can evidently generate these. 
 		 * This causes kernel to core dump.
 		 * for compatibility, If we see this, point to a safe address.
 		 */
 		if (sa->sa_len == 0) {
 			rtinfo->rti_info[i] = &sa_zero;
 			return (0); /* should be EINVAL but for compat */
 		}
 		/* accept it */
 		rtinfo->rti_info[i] = sa;
 		cp += SA_SIZE(sa);
 	}
 	return (0);
 }
 
 static struct mbuf *
 rt_msg1(int type, struct rt_addrinfo *rtinfo)
 {
 	struct rt_msghdr *rtm;
 	struct mbuf *m;
 	int i;
 	struct sockaddr *sa;
 	int len, dlen;
 
 	switch (type) {
 
 	case RTM_DELADDR:
 	case RTM_NEWADDR:
 		len = sizeof(struct ifa_msghdr);
 		break;
 
 	case RTM_DELMADDR:
 	case RTM_NEWMADDR:
 		len = sizeof(struct ifma_msghdr);
 		break;
 
 	case RTM_IFINFO:
 		len = sizeof(struct if_msghdr);
 		break;
 
 	case RTM_IFANNOUNCE:
 	case RTM_IEEE80211:
 		len = sizeof(struct if_announcemsghdr);
 		break;
 
 	default:
 		len = sizeof(struct rt_msghdr);
 	}
 	if (len > MCLBYTES)
 		panic("rt_msg1");
 	m = m_gethdr(M_DONTWAIT, MT_DATA);
 	if (m && len > MHLEN) {
 		MCLGET(m, M_DONTWAIT);
 		if ((m->m_flags & M_EXT) == 0) {
 			m_free(m);
 			m = NULL;
 		}
 	}
 	if (m == NULL)
 		return (m);
 	m->m_pkthdr.len = m->m_len = len;
 	m->m_pkthdr.rcvif = NULL;
 	rtm = mtod(m, struct rt_msghdr *);
 	bzero((caddr_t)rtm, len);
 	for (i = 0; i < RTAX_MAX; i++) {
 		if ((sa = rtinfo->rti_info[i]) == NULL)
 			continue;
 		rtinfo->rti_addrs |= (1 << i);
 		dlen = SA_SIZE(sa);
 		m_copyback(m, len, dlen, (caddr_t)sa);
 		len += dlen;
 	}
 	if (m->m_pkthdr.len != len) {
 		m_freem(m);
 		return (NULL);
 	}
 	rtm->rtm_msglen = len;
 	rtm->rtm_version = RTM_VERSION;
 	rtm->rtm_type = type;
 	return (m);
 }
 
 static int
 rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w)
 {
 	int i;
 	int len, dlen, second_time = 0;
 	caddr_t cp0;
 
 	rtinfo->rti_addrs = 0;
 again:
 	switch (type) {
 
 	case RTM_DELADDR:
 	case RTM_NEWADDR:
 		len = sizeof(struct ifa_msghdr);
 		break;
 
 	case RTM_IFINFO:
 		len = sizeof(struct if_msghdr);
 		break;
 
 	case RTM_NEWMADDR:
 		len = sizeof(struct ifma_msghdr);
 		break;
 
 	default:
 		len = sizeof(struct rt_msghdr);
 	}
 	cp0 = cp;
 	if (cp0)
 		cp += len;
 	for (i = 0; i < RTAX_MAX; i++) {
 		struct sockaddr *sa;
 
 		if ((sa = rtinfo->rti_info[i]) == NULL)
 			continue;
 		rtinfo->rti_addrs |= (1 << i);
 		dlen = SA_SIZE(sa);
 		if (cp) {
 			bcopy((caddr_t)sa, cp, (unsigned)dlen);
 			cp += dlen;
 		}
 		len += dlen;
 	}
 	len = ALIGN(len);
 	if (cp == NULL && w != NULL && !second_time) {
 		struct walkarg *rw = w;
 
 		if (rw->w_req) {
 			if (rw->w_tmemsize < len) {
 				if (rw->w_tmem)
 					free(rw->w_tmem, M_RTABLE);
 				rw->w_tmem = (caddr_t)
 					malloc(len, M_RTABLE, M_NOWAIT);
 				if (rw->w_tmem)
 					rw->w_tmemsize = len;
 			}
 			if (rw->w_tmem) {
 				cp = rw->w_tmem;
 				second_time = 1;
 				goto again;
 			}
 		}
 	}
 	if (cp) {
 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
 
 		rtm->rtm_version = RTM_VERSION;
 		rtm->rtm_type = type;
 		rtm->rtm_msglen = len;
 	}
 	return (len);
 }
 
 /*
  * This routine is called to generate a message from the routing
  * socket indicating that a redirect has occured, a routing lookup
  * has failed, or that a protocol has detected timeouts to a particular
  * destination.
  */
 void
 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
 {
 	struct rt_msghdr *rtm;
 	struct mbuf *m;
 	struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
 
 	if (route_cb.any_count == 0)
 		return;
 	m = rt_msg1(type, rtinfo);
 	if (m == NULL)
 		return;
 	rtm = mtod(m, struct rt_msghdr *);
 	rtm->rtm_flags = RTF_DONE | flags;
 	rtm->rtm_errno = error;
 	rtm->rtm_addrs = rtinfo->rti_addrs;
 	rt_dispatch(m, sa);
 }
 
 /*
  * This routine is called to generate a message from the routing
  * socket indicating that the status of a network interface has changed.
  */
 void
 rt_ifmsg(struct ifnet *ifp)
 {
 	struct if_msghdr *ifm;
 	struct mbuf *m;
 	struct rt_addrinfo info;
 
 	if (route_cb.any_count == 0)
 		return;
 	bzero((caddr_t)&info, sizeof(info));
 	m = rt_msg1(RTM_IFINFO, &info);
 	if (m == NULL)
 		return;
 	ifm = mtod(m, struct if_msghdr *);
 	ifm->ifm_index = ifp->if_index;
 	ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
 	ifm->ifm_data = ifp->if_data;
 	ifm->ifm_addrs = 0;
 	rt_dispatch(m, NULL);
 }
 
 /*
  * This is called to generate messages from the routing socket
  * indicating a network interface has had addresses associated with it.
  * if we ever reverse the logic and replace messages TO the routing
  * socket indicate a request to configure interfaces, then it will
  * be unnecessary as the routing socket will automatically generate
  * copies of it.
  */
 void
 rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
 {
 	struct rt_addrinfo info;
 	struct sockaddr *sa = NULL;
 	int pass;
 	struct mbuf *m = NULL;
 	struct ifnet *ifp = ifa->ifa_ifp;
 
 	KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
 		("unexpected cmd %u", cmd));
 #ifdef SCTP
 	/*
 	 * notify the SCTP stack
 	 * this will only get called when an address is added/deleted
 	 * XXX pass the ifaddr struct instead if ifa->ifa_addr...
 	 */
 	sctp_addr_change(ifa, cmd);
 #endif /* SCTP */
 	if (route_cb.any_count == 0)
 		return;
 	for (pass = 1; pass < 3; pass++) {
 		bzero((caddr_t)&info, sizeof(info));
 		if ((cmd == RTM_ADD && pass == 1) ||
 		    (cmd == RTM_DELETE && pass == 2)) {
 			struct ifa_msghdr *ifam;
 			int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
 
 			info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
 			info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
 			if ((m = rt_msg1(ncmd, &info)) == NULL)
 				continue;
 			ifam = mtod(m, struct ifa_msghdr *);
 			ifam->ifam_index = ifp->if_index;
 			ifam->ifam_metric = ifa->ifa_metric;
 			ifam->ifam_flags = ifa->ifa_flags;
 			ifam->ifam_addrs = info.rti_addrs;
 		}
 		if ((cmd == RTM_ADD && pass == 2) ||
 		    (cmd == RTM_DELETE && pass == 1)) {
 			struct rt_msghdr *rtm;
 
 			if (rt == NULL)
 				continue;
 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 			info.rti_info[RTAX_DST] = sa = rt_key(rt);
 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 			if ((m = rt_msg1(cmd, &info)) == NULL)
 				continue;
 			rtm = mtod(m, struct rt_msghdr *);
 			rtm->rtm_index = ifp->if_index;
 			rtm->rtm_flags |= rt->rt_flags;
 			rtm->rtm_errno = error;
 			rtm->rtm_addrs = info.rti_addrs;
 		}
 		rt_dispatch(m, sa);
 	}
 }
 
 /*
  * This is the analogue to the rt_newaddrmsg which performs the same
  * function but for multicast group memberhips.  This is easier since
  * there is no route state to worry about.
  */
 void
 rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
 {
 	struct rt_addrinfo info;
 	struct mbuf *m = NULL;
 	struct ifnet *ifp = ifma->ifma_ifp;
 	struct ifma_msghdr *ifmam;
 
 	if (route_cb.any_count == 0)
 		return;
 
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_info[RTAX_IFA] = ifma->ifma_addr;
 	info.rti_info[RTAX_IFP] = ifp ? ifp->if_addr->ifa_addr : NULL;
 	/*
 	 * If a link-layer address is present, present it as a ``gateway''
 	 * (similarly to how ARP entries, e.g., are presented).
 	 */
 	info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
 	m = rt_msg1(cmd, &info);
 	if (m == NULL)
 		return;
 	ifmam = mtod(m, struct ifma_msghdr *);
 	KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n",
 	    __func__));
 	ifmam->ifmam_index = ifp->if_index;
 	ifmam->ifmam_addrs = info.rti_addrs;
 	rt_dispatch(m, ifma->ifma_addr);
 }
 
 static struct mbuf *
 rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
 	struct rt_addrinfo *info)
 {
 	struct if_announcemsghdr *ifan;
 	struct mbuf *m;
 
 	if (route_cb.any_count == 0)
 		return NULL;
 	bzero((caddr_t)info, sizeof(*info));
 	m = rt_msg1(type, info);
 	if (m != NULL) {
 		ifan = mtod(m, struct if_announcemsghdr *);
 		ifan->ifan_index = ifp->if_index;
 		strlcpy(ifan->ifan_name, ifp->if_xname,
 			sizeof(ifan->ifan_name));
 		ifan->ifan_what = what;
 	}
 	return m;
 }
 
 /*
  * This is called to generate routing socket messages indicating
  * IEEE80211 wireless events.
  * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
  */
 void
 rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
 {
 	struct mbuf *m;
 	struct rt_addrinfo info;
 
 	m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
 	if (m != NULL) {
 		/*
 		 * Append the ieee80211 data.  Try to stick it in the
 		 * mbuf containing the ifannounce msg; otherwise allocate
 		 * a new mbuf and append.
 		 *
 		 * NB: we assume m is a single mbuf.
 		 */
 		if (data_len > M_TRAILINGSPACE(m)) {
 			struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
 			if (n == NULL) {
 				m_freem(m);
 				return;
 			}
 			bcopy(data, mtod(n, void *), data_len);
 			n->m_len = data_len;
 			m->m_next = n;
 		} else if (data_len > 0) {
 			bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len);
 			m->m_len += data_len;
 		}
 		if (m->m_flags & M_PKTHDR)
 			m->m_pkthdr.len += data_len;
 		mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
 		rt_dispatch(m, NULL);
 	}
 }
 
 /*
  * This is called to generate routing socket messages indicating
  * network interface arrival and departure.
  */
 void
 rt_ifannouncemsg(struct ifnet *ifp, int what)
 {
 	struct mbuf *m;
 	struct rt_addrinfo info;
 
 	m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
 	if (m != NULL)
 		rt_dispatch(m, NULL);
 }
 
 static void
 rt_dispatch(struct mbuf *m, const struct sockaddr *sa)
 {
 	INIT_VNET_NET(curvnet);
 	struct m_tag *tag;
 
 	/*
 	 * Preserve the family from the sockaddr, if any, in an m_tag for
 	 * use when injecting the mbuf into the routing socket buffer from
 	 * the netisr.
 	 */
 	if (sa != NULL) {
 		tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short),
 		    M_NOWAIT);
 		if (tag == NULL) {
 			m_freem(m);
 			return;
 		}
 		*(unsigned short *)(tag + 1) = sa->sa_family;
 		m_tag_prepend(m, tag);
 	}
 #ifdef VIMAGE
 	if (V_loif)
 		m->m_pkthdr.rcvif = V_loif;
 	else {
 		m_freem(m);
 		return;
 	}
 #endif
 	netisr_queue(NETISR_ROUTE, m);	/* mbuf is free'd on failure. */
 }
 
 /*
  * This is used in dumping the kernel table via sysctl().
  */
 static int
 sysctl_dumpentry(struct radix_node *rn, void *vw)
 {
 	struct walkarg *w = vw;
 	struct rtentry *rt = (struct rtentry *)rn;
 	int error = 0, size;
 	struct rt_addrinfo info;
 
 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
 		return 0;
 	if ((rt->rt_flags & RTF_HOST) == 0
 	    ? jailed(w->w_req->td->td_ucred)
 	    : prison_if(w->w_req->td->td_ucred, rt_key(rt)) != 0)
 		return (0);
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_info[RTAX_DST] = rt_key(rt);
 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 	info.rti_info[RTAX_GENMASK] = 0;
 	if (rt->rt_ifp) {
 		info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr;
 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
 		if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
 	}
 	size = rt_msg2(RTM_GET, &info, NULL, w);
 	if (w->w_req && w->w_tmem) {
 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
 
 		rtm->rtm_flags = rt->rt_flags;
 		/*
 		 * let's be honest about this being a retarded hack
 		 */
 		rtm->rtm_fmask = rt->rt_rmx.rmx_pksent;
 		rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
 		rtm->rtm_index = rt->rt_ifp->if_index;
 		rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
 		rtm->rtm_addrs = info.rti_addrs;
 		error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
 		return (error);
 	}
 	return (error);
 }
 
 static int
 sysctl_iflist(int af, struct walkarg *w)
 {
 	INIT_VNET_NET(curvnet);
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	struct rt_addrinfo info;
 	int len, error = 0;
 
 	bzero((caddr_t)&info, sizeof(info));
 	IFNET_RLOCK();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if (w->w_arg && w->w_arg != ifp->if_index)
 			continue;
 		ifa = ifp->if_addr;
 		info.rti_info[RTAX_IFP] = ifa->ifa_addr;
 		len = rt_msg2(RTM_IFINFO, &info, NULL, w);
 		info.rti_info[RTAX_IFP] = NULL;
 		if (w->w_req && w->w_tmem) {
 			struct if_msghdr *ifm;
 
 			ifm = (struct if_msghdr *)w->w_tmem;
 			ifm->ifm_index = ifp->if_index;
 			ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
 			ifm->ifm_data = ifp->if_data;
 			ifm->ifm_addrs = info.rti_addrs;
 			error = SYSCTL_OUT(w->w_req,(caddr_t)ifm, len);
 			if (error)
 				goto done;
 		}
 		while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) {
 			if (af && af != ifa->ifa_addr->sa_family)
 				continue;
 			if (prison_if(w->w_req->td->td_ucred,
 			    ifa->ifa_addr) != 0)
 				continue;
 			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
 			len = rt_msg2(RTM_NEWADDR, &info, NULL, w);
 			if (w->w_req && w->w_tmem) {
 				struct ifa_msghdr *ifam;
 
 				ifam = (struct ifa_msghdr *)w->w_tmem;
 				ifam->ifam_index = ifa->ifa_ifp->if_index;
 				ifam->ifam_flags = ifa->ifa_flags;
 				ifam->ifam_metric = ifa->ifa_metric;
 				ifam->ifam_addrs = info.rti_addrs;
 				error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
 				if (error)
 					goto done;
 			}
 		}
 		info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
 			info.rti_info[RTAX_BRD] = NULL;
 	}
 done:
 	IFNET_RUNLOCK();
 	return (error);
 }
 
 static int
 sysctl_ifmalist(int af, struct walkarg *w)
 {
 	INIT_VNET_NET(curvnet);
 	struct ifnet *ifp;
 	struct ifmultiaddr *ifma;
 	struct	rt_addrinfo info;
 	int	len, error = 0;
 	struct ifaddr *ifa;
 
 	bzero((caddr_t)&info, sizeof(info));
 	IFNET_RLOCK();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if (w->w_arg && w->w_arg != ifp->if_index)
 			continue;
 		ifa = ifp->if_addr;
 		info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
 		IF_ADDR_LOCK(ifp);
 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			if (af && af != ifma->ifma_addr->sa_family)
 				continue;
 			if (prison_if(w->w_req->td->td_ucred,
 			    ifma->ifma_addr) != 0)
 				continue;
 			info.rti_info[RTAX_IFA] = ifma->ifma_addr;
 			info.rti_info[RTAX_GATEWAY] =
 			    (ifma->ifma_addr->sa_family != AF_LINK) ?
 			    ifma->ifma_lladdr : NULL;
 			len = rt_msg2(RTM_NEWMADDR, &info, NULL, w);
 			if (w->w_req && w->w_tmem) {
 				struct ifma_msghdr *ifmam;
 
 				ifmam = (struct ifma_msghdr *)w->w_tmem;
 				ifmam->ifmam_index = ifma->ifma_ifp->if_index;
 				ifmam->ifmam_flags = 0;
 				ifmam->ifmam_addrs = info.rti_addrs;
 				error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
 				if (error) {
 					IF_ADDR_UNLOCK(ifp);
 					goto done;
 				}
 			}
 		}
 		IF_ADDR_UNLOCK(ifp);
 	}
 done:
 	IFNET_RUNLOCK();
 	return (error);
 }
 
 static int
 sysctl_rtsock(SYSCTL_HANDLER_ARGS)
 {
 	INIT_VNET_NET(curvnet);
 	int	*name = (int *)arg1;
 	u_int	namelen = arg2;
 	struct radix_node_head *rnh;
 	int	i, lim, error = EINVAL;
 	u_char	af;
 	struct	walkarg w;
 
 	name ++;
 	namelen--;
 	if (req->newptr)
 		return (EPERM);
 	if (namelen != 3)
 		return ((namelen < 3) ? EISDIR : ENOTDIR);
 	af = name[0];
 	if (af > AF_MAX)
 		return (EINVAL);
 	bzero(&w, sizeof(w));
 	w.w_op = name[1];
 	w.w_arg = name[2];
 	w.w_req = req;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error)
 		return (error);
 	switch (w.w_op) {
 
 	case NET_RT_DUMP:
 	case NET_RT_FLAGS:
 		if (af == 0) {			/* dump all tables */
 			i = 1;
 			lim = AF_MAX;
 		} else				/* dump only one table */
 			i = lim = af;
 
 		/*
 		 * take care of llinfo entries, the caller must
 		 * specify an AF
 		 */
 		if (w.w_op == NET_RT_FLAGS &&
 		    (w.w_arg == 0 || w.w_arg & RTF_LLINFO)) {
 			if (af != 0)
 				error = lltable_sysctl_dumparp(af, w.w_req);
 			else
 				error = EINVAL;
 			break;
 		}
 		/*
 		 * take care of routing entries
 		 */
 		for (error = 0; error == 0 && i <= lim; i++)
 			if ((rnh = V_rt_tables[req->td->td_proc->p_fibnum][i]) != NULL) {
 				RADIX_NODE_HEAD_LOCK(rnh); 
 			    	error = rnh->rnh_walktree(rnh,
 				    sysctl_dumpentry, &w);
 				RADIX_NODE_HEAD_UNLOCK(rnh);
 			} else if (af != 0)
 				error = EAFNOSUPPORT;
 		break;
 
 	case NET_RT_IFLIST:
 		error = sysctl_iflist(af, &w);
 		break;
 
 	case NET_RT_IFMALIST:
 		error = sysctl_ifmalist(af, &w);
 		break;
 	}
 	if (w.w_tmem)
 		free(w.w_tmem, M_RTABLE);
 	return (error);
 }
 
 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
 
 /*
  * Definitions of protocols supported in the ROUTE domain.
  */
 
 static struct domain routedomain;		/* or at least forward */
 
 static struct protosw routesw[] = {
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&routedomain,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_output =		route_output,
 	.pr_ctlinput =		raw_ctlinput,
 	.pr_init =		raw_init,
 	.pr_usrreqs =		&route_usrreqs
 }
 };
 
 static struct domain routedomain = {
 	.dom_family =		PF_ROUTE,
 	.dom_name =		 "route",
 	.dom_protosw =		routesw,
 	.dom_protoswNPROTOSW =	&routesw[sizeof(routesw)/sizeof(routesw[0])]
 };
 
 DOMAIN_SET(route);
Index: projects/pnet/sys/netatalk/ddp_usrreq.c
===================================================================
--- projects/pnet/sys/netatalk/ddp_usrreq.c	(revision 193105)
+++ projects/pnet/sys/netatalk/ddp_usrreq.c	(revision 193106)
@@ -1,333 +1,333 @@
 /*-
  * Copyright (c) 2004-2009 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Copyright (c) 1990, 1994 Regents of The University of Michigan.
  * All Rights Reserved.
  *
  * Permission to use, copy, modify, and distribute this software and
  * its documentation for any purpose and without fee is hereby granted,
  * provided that the above copyright notice appears in all copies and
  * that both that copyright notice and this permission notice appear
  * in supporting documentation, and that the name of The University
  * of Michigan not be used in advertising or publicity pertaining to
  * distribution of the software without specific, written prior
  * permission. This software is supplied as is without expressed or
  * implied warranties of any kind.
  *
  * This product includes software developed by the University of
  * California, Berkeley and its contributors.
  *
  *	Research Systems Unix Group
  *	The University of Michigan
  *	c/o Wesley Craig
  *	535 W. William Street
  *	Ann Arbor, Michigan
  *	+1-313-764-2278
  *	netatalk@umich.edu
  *
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <net/if.h>
 #include <net/route.h>
 #include <net/netisr.h>
 
 #include <netatalk/at.h>
 #include <netatalk/at_var.h>
 #include <netatalk/ddp_var.h>
 #include <netatalk/ddp_pcb.h>
 #include <netatalk/at_extern.h>
 
 static u_long	ddp_sendspace = DDP_MAXSZ; /* Max ddp size + 1 (ddp_type) */
 static u_long	ddp_recvspace = 10 * (587 + sizeof(struct sockaddr_at));
 
 static const struct netisr_handler atalk1_nh = {
 	.nh_name = "atalk1",
 	.nh_handler = at1intr,
 	.nh_proto = NETISR_ATALK1,
 	.nh_qlimit = IFQ_MAXLEN,
 	.nh_policy = NETISR_POLICY_SOURCE,
 };
 
 static const struct netisr_handler atalk2_nh = {
 	.nh_name = "atalk2",
 	.nh_handler = at2intr,
 	.nh_proto = NETISR_ATALK2,
 	.nh_qlimit = IFQ_MAXLEN,
 	.nh_policy = NETISR_POLICY_SOURCE,
 };
 
 static const struct netisr_handler aarp_nh = {
 	.nh_name = "aarp",
 	.nh_handler = aarpintr,
 	.nh_proto = NETISR_AARP,
 	.nh_qlimit = IFQ_MAXLEN,
 	.nh_policy = NETISR_POLICY_SOURCE,
 };
 
 static int
 ddp_attach(struct socket *so, int proto, struct thread *td)
 {
 	int error = 0;
 	
 	KASSERT(sotoddpcb(so) == NULL, ("ddp_attach: ddp != NULL"));
 
 	/*
 	 * Allocate socket buffer space first so that it's present
 	 * before first use.
 	 */
 	error = soreserve(so, ddp_sendspace, ddp_recvspace);
 	if (error)
 		return (error);
 
 	DDP_LIST_XLOCK();
 	error = at_pcballoc(so);
 	DDP_LIST_XUNLOCK();
 	return (error);
 }
 
 static void
 ddp_detach(struct socket *so)
 {
 	struct ddpcb *ddp;
 	
 	ddp = sotoddpcb(so);
 	KASSERT(ddp != NULL, ("ddp_detach: ddp == NULL"));
 
 	DDP_LIST_XLOCK();
 	DDP_LOCK(ddp);
 	at_pcbdetach(so, ddp);
 	DDP_LIST_XUNLOCK();
 }
 
 static int      
 ddp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct ddpcb *ddp;
 	int error = 0;
 	
 	ddp = sotoddpcb(so);
 	KASSERT(ddp != NULL, ("ddp_bind: ddp == NULL"));
 
 	DDP_LIST_XLOCK();
 	DDP_LOCK(ddp);
 	error = at_pcbsetaddr(ddp, nam, td);
 	DDP_UNLOCK(ddp);
 	DDP_LIST_XUNLOCK();
 	return (error);
 }
     
 static int
 ddp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct ddpcb *ddp;
 	int error = 0;
 	
 	ddp = sotoddpcb(so);
 	KASSERT(ddp != NULL, ("ddp_connect: ddp == NULL"));
 
 	DDP_LIST_XLOCK();
 	DDP_LOCK(ddp);
 	if (ddp->ddp_fsat.sat_port != ATADDR_ANYPORT) {
 		DDP_UNLOCK(ddp);
 		DDP_LIST_XUNLOCK();
 		return (EISCONN);
 	}
 
 	error = at_pcbconnect( ddp, nam, td );
 	DDP_UNLOCK(ddp);
 	DDP_LIST_XUNLOCK();
 	if (error == 0)
 		soisconnected(so);
 	return (error);
 }
 
 static int
 ddp_disconnect(struct socket *so)
 {
 	struct ddpcb *ddp;
 	
 	ddp = sotoddpcb(so);
 	KASSERT(ddp != NULL, ("ddp_disconnect: ddp == NULL"));
 
 	DDP_LOCK(ddp);
 	if (ddp->ddp_fsat.sat_addr.s_node == ATADDR_ANYNODE) {
 		DDP_UNLOCK(ddp);
 		return (ENOTCONN);
 	}
 
 	at_pcbdisconnect(ddp);
 	ddp->ddp_fsat.sat_addr.s_node = ATADDR_ANYNODE;
 	DDP_UNLOCK(ddp);
 	soisdisconnected(so);
 	return (0);
 }
 
 static int
 ddp_shutdown(struct socket *so)
 {
 
 	KASSERT(sotoddpcb(so) != NULL, ("ddp_shutdown: ddp == NULL"));
 
 	socantsendmore(so);
 	return (0);
 }
 
 static int
 ddp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
     struct mbuf *control, struct thread *td)
 {
 	struct ddpcb *ddp;
 	int error = 0;
 	
 	ddp = sotoddpcb(so);
 	KASSERT(ddp != NULL, ("ddp_send: ddp == NULL"));
 
     	if (control && control->m_len)
 		return (EINVAL);
 
 	if (addr != NULL) {
 		DDP_LIST_XLOCK();
 		DDP_LOCK(ddp);
 		if (ddp->ddp_fsat.sat_port != ATADDR_ANYPORT) {
 			error = EISCONN;
 			goto out;
 		}
 
 		error = at_pcbconnect(ddp, addr, td);
 		if (error == 0) {
 			error = ddp_output(m, so);
 			at_pcbdisconnect(ddp);
 		}
 out:
 		DDP_UNLOCK(ddp);
 		DDP_LIST_XUNLOCK();
 	} else {
 		DDP_LOCK(ddp);
 		if (ddp->ddp_fsat.sat_port == ATADDR_ANYPORT)
 			error = ENOTCONN;
 		else
 			error = ddp_output(m, so);
 		DDP_UNLOCK(ddp);
 	}
 	return (error);
 }
 
 /*
  * XXXRW: This is never called because we only invoke abort on stream
  * protocols.
  */
 static void
 ddp_abort(struct socket *so)
 {
 	struct ddpcb	*ddp;
 	
 	ddp = sotoddpcb(so);
 	KASSERT(ddp != NULL, ("ddp_abort: ddp == NULL"));
 
 	DDP_LOCK(ddp);
 	at_pcbdisconnect(ddp);
 	DDP_UNLOCK(ddp);
 	soisdisconnected(so);
 }
 
 static void
 ddp_close(struct socket *so)
 {
 	struct ddpcb	*ddp;
 	
 	ddp = sotoddpcb(so);
 	KASSERT(ddp != NULL, ("ddp_close: ddp == NULL"));
 
 	DDP_LOCK(ddp);
 	at_pcbdisconnect(ddp);
 	DDP_UNLOCK(ddp);
 	soisdisconnected(so);
 }
 
 void 
 ddp_init(void)
 {
 
 	DDP_LIST_LOCK_INIT();
-	netisr2_register(&atalk1_nh);
-	netisr2_register(&atalk2_nh);
-	netisr2_register(&aarp_nh);
+	netisr_register(&atalk1_nh);
+	netisr_register(&atalk2_nh);
+	netisr_register(&aarp_nh);
 }
 
 #if 0
 static void 
 ddp_clean(void)
 {
 	struct ddpcp	*ddp;
 
 	for (ddp = ddpcb_list; ddp != NULL; ddp = ddp->ddp_next)
 		at_pcbdetach(ddp->ddp_socket, ddp);
 	DDP_LIST_LOCK_DESTROY();
 }
 #endif
 
 static int
 at_getpeeraddr(struct socket *so, struct sockaddr **nam)
 {
 
 	return (EOPNOTSUPP);
 }
 
 static int
 at_getsockaddr(struct socket *so, struct sockaddr **nam)
 {
 	struct ddpcb	*ddp;
 
 	ddp = sotoddpcb(so);
 	KASSERT(ddp != NULL, ("at_getsockaddr: ddp == NULL"));
 
 	DDP_LOCK(ddp);
 	at_sockaddr(ddp, nam);
 	DDP_UNLOCK(ddp);
 	return (0);
 }
 
 struct pr_usrreqs ddp_usrreqs = {
 	.pru_abort =		ddp_abort,
 	.pru_attach =		ddp_attach,
 	.pru_bind =		ddp_bind,
 	.pru_connect =		ddp_connect,
 	.pru_control =		at_control,
 	.pru_detach =		ddp_detach,
 	.pru_disconnect =	ddp_disconnect,
 	.pru_peeraddr =		at_getpeeraddr,
 	.pru_send =		ddp_send,
 	.pru_shutdown =		ddp_shutdown,
 	.pru_sockaddr =		at_getsockaddr,
 	.pru_close =		ddp_close,
 };
Index: projects/pnet/sys/netinet/if_ether.c
===================================================================
--- projects/pnet/sys/netinet/if_ether.c	(revision 193105)
+++ projects/pnet/sys/netinet/if_ether.c	(revision 193106)
@@ -1,834 +1,834 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if_ether.c	8.1 (Berkeley) 6/10/93
  */
 
 /*
  * Ethernet address resolution protocol.
  * TODO:
  *	add "inuse/lock" bit (or ref. count) along with valid bit
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_route.h"
 #include "opt_mac.h"
 #include "opt_carp.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/syslog.h>
 #include <sys/vimage.h>
 
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/netisr.h>
 #include <net/if_llc.h>
 #include <net/ethernet.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <net/if_llatbl.h>
 #include <netinet/if_ether.h>
 #include <netinet/vinet.h>
 
 #include <net/if_arc.h>
 #include <net/iso88025.h>
 
 #ifdef DEV_CARP
 #include <netinet/ip_carp.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 #define SIN(s) ((struct sockaddr_in *)s)
 #define SDL(s) ((struct sockaddr_dl *)s)
 
 SYSCTL_DECL(_net_link_ether);
 SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, "");
 
 /* timer values */
 #ifdef VIMAGE_GLOBALS
 static int	arpt_keep; /* once resolved, good for 20 more minutes */
 static int	arp_maxtries;
 int	useloopback; /* use loopback interface for local traffic */
 static int	arp_proxyall;
 #endif
 
 SYSCTL_V_INT(V_NET, vnet_inet, _net_link_ether_inet, OID_AUTO, max_age,
     CTLFLAG_RW, arpt_keep, 0, "ARP entry lifetime in seconds");
 
 SYSCTL_V_INT(V_NET, vnet_inet, _net_link_ether_inet, OID_AUTO, maxtries,
 	CTLFLAG_RW, arp_maxtries, 0,
 	"ARP resolution attempts before returning error");
 SYSCTL_V_INT(V_NET, vnet_inet, _net_link_ether_inet, OID_AUTO, useloopback,
 	CTLFLAG_RW, useloopback, 0,
 	"Use the loopback interface for local traffic");
 SYSCTL_V_INT(V_NET, vnet_inet, _net_link_ether_inet, OID_AUTO, proxyall,
 	CTLFLAG_RW, arp_proxyall, 0,
 	"Enable proxy ARP for all suitable requests");
 
 static void	arp_init(void);
 static int	arp_iattach(const void *);
 void		arprequest(struct ifnet *,
 			struct in_addr *, struct in_addr *, u_char *);
 static void	arpintr(struct mbuf *);
 static void	arptimer(void *);
 #ifdef INET
 static void	in_arpinput(struct mbuf *);
 #endif
 
 static const struct netisr_handler arp_nh = {
 	.nh_name = "arp",
 	.nh_handler = arpintr,
 	.nh_proto = NETISR_ARP,
 	.nh_qlimit = 50,
 	.nh_policy = NETISR_POLICY_SOURCE,
 };
 
 #ifndef VIMAGE_GLOBALS
 static const vnet_modinfo_t vnet_arp_modinfo = {
 	.vmi_id		= VNET_MOD_ARP,
 	.vmi_name	= "arp",
 	.vmi_dependson	= VNET_MOD_INET,
 	.vmi_iattach	= arp_iattach
 };
 #endif /* !VIMAGE_GLOBALS */
 
 #ifdef AF_INET
 void arp_ifscrub(struct ifnet *ifp, uint32_t addr);
 
 /*
  * called by in_ifscrub to remove entry from the table when
  * the interface goes away
  */
 void
 arp_ifscrub(struct ifnet *ifp, uint32_t addr)
 {
 	struct sockaddr_in addr4;
 
 	bzero((void *)&addr4, sizeof(addr4));
 	addr4.sin_len    = sizeof(addr4);
 	addr4.sin_family = AF_INET;
 	addr4.sin_addr.s_addr = addr;
 	CURVNET_SET(ifp->if_vnet);
 	IF_AFDATA_LOCK(ifp);
 	lla_lookup(LLTABLE(ifp), (LLE_DELETE | LLE_IFADDR),
 	    (struct sockaddr *)&addr4);
 	IF_AFDATA_UNLOCK(ifp);
 	CURVNET_RESTORE();
 }
 #endif
 
 /*
  * Timeout routine.  Age arp_tab entries periodically.
  */
 static void
 arptimer(void *arg)
 {
 	struct ifnet *ifp;
 	struct llentry   *lle = (struct llentry *)arg;
 
 	if (lle == NULL) {
 		panic("%s: NULL entry!\n", __func__);
 		return;
 	}
 	ifp = lle->lle_tbl->llt_ifp;
 	IF_AFDATA_LOCK(ifp);
 	LLE_WLOCK(lle);
 	if (((lle->la_flags & LLE_DELETED)
 		|| (time_second >= lle->la_expire))
 	    && (!callout_pending(&lle->la_timer) &&
 		callout_active(&lle->la_timer)))
 		(void) llentry_free(lle);
 	else {
 		/*
 		 * Still valid, just drop our reference
 		 */
 		LLE_FREE_LOCKED(lle);
 	}
 	IF_AFDATA_UNLOCK(ifp);
 }
 
 /*
  * Broadcast an ARP request. Caller specifies:
  *	- arp header source ip address
  *	- arp header target ip address
  *	- arp header source ethernet address
  */
 void
 arprequest(struct ifnet *ifp, struct in_addr *sip, struct in_addr  *tip,
     u_char *enaddr)
 {
 	struct mbuf *m;
 	struct arphdr *ah;
 	struct sockaddr sa;
 
 	if (sip == NULL) {
 		/* XXX don't believe this can happen (or explain why) */
 		/*
 		 * The caller did not supply a source address, try to find
 		 * a compatible one among those assigned to this interface.
 		 */
 		struct ifaddr *ifa;
 
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (!ifa->ifa_addr ||
 			    ifa->ifa_addr->sa_family != AF_INET)
 				continue;
 			sip = &SIN(ifa->ifa_addr)->sin_addr;
 			if (0 == ((sip->s_addr ^ tip->s_addr) &
 			    SIN(ifa->ifa_netmask)->sin_addr.s_addr) )
 				break;  /* found it. */
 		}
 		if (sip == NULL) {  
 			printf("%s: cannot find matching address\n", __func__);
 			return;
 		}
 	}
 
 	if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
 		return;
 	m->m_len = sizeof(*ah) + 2*sizeof(struct in_addr) +
 		2*ifp->if_data.ifi_addrlen;
 	m->m_pkthdr.len = m->m_len;
 	MH_ALIGN(m, m->m_len);
 	ah = mtod(m, struct arphdr *);
 	bzero((caddr_t)ah, m->m_len);
 #ifdef MAC
 	mac_netinet_arp_send(ifp, m);
 #endif
 	ah->ar_pro = htons(ETHERTYPE_IP);
 	ah->ar_hln = ifp->if_addrlen;		/* hardware address length */
 	ah->ar_pln = sizeof(struct in_addr);	/* protocol address length */
 	ah->ar_op = htons(ARPOP_REQUEST);
 	bcopy((caddr_t)enaddr, (caddr_t)ar_sha(ah), ah->ar_hln);
 	bcopy((caddr_t)sip, (caddr_t)ar_spa(ah), ah->ar_pln);
 	bcopy((caddr_t)tip, (caddr_t)ar_tpa(ah), ah->ar_pln);
 	sa.sa_family = AF_ARP;
 	sa.sa_len = 2;
 	m->m_flags |= M_BCAST;
 	(*ifp->if_output)(ifp, m, &sa, NULL);
 }
 
 /*
  * Resolve an IP address into an ethernet address.
  * On input:
  *    ifp is the interface we use
  *    rt0 is the route to the final destination (possibly useless)
  *    m is the mbuf. May be NULL if we don't have a packet.
  *    dst is the next hop,
  *    desten is where we want the address.
  *
  * On success, desten is filled in and the function returns 0;
  * If the packet must be held pending resolution, we return EWOULDBLOCK
  * On other errors, we return the corresponding error code.
  * Note that m_freem() handles NULL.
  */
 int
 arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m,
 	struct sockaddr *dst, u_char *desten, struct llentry **lle)
 {
 	INIT_VNET_INET(ifp->if_vnet);
 	struct llentry *la = 0;
 	u_int flags = 0;
 	int error, renew;
 
 	*lle = NULL;
 	if (m != NULL) {
 		if (m->m_flags & M_BCAST) {
 			/* broadcast */
 			(void)memcpy(desten,
 			    ifp->if_broadcastaddr, ifp->if_addrlen);
 			return (0);
 		}
 		if (m->m_flags & M_MCAST && ifp->if_type != IFT_ARCNET) {
 			/* multicast */
 			ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten);
 			return (0);
 		}
 	}
 	/* XXXXX
 	 */
 retry:
 	IF_AFDATA_RLOCK(ifp);	
 	la = lla_lookup(LLTABLE(ifp), flags, dst);
 	IF_AFDATA_RUNLOCK(ifp);	
 	if ((la == NULL) && ((flags & LLE_EXCLUSIVE) == 0)
 	    && ((ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0)) {		
 		flags |= (LLE_CREATE | LLE_EXCLUSIVE);
 		IF_AFDATA_WLOCK(ifp);	
 		la = lla_lookup(LLTABLE(ifp), flags, dst);
 		IF_AFDATA_WUNLOCK(ifp);	
 	}
 	if (la == NULL) {
 		if (flags & LLE_CREATE)
 			log(LOG_DEBUG,
 			    "arpresolve: can't allocate llinfo for %s\n",
 			    inet_ntoa(SIN(dst)->sin_addr));
 		m_freem(m);
 		return (EINVAL);
 	} 
 
 	if ((la->la_flags & LLE_VALID) &&
 	    ((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) {
 		bcopy(&la->ll_addr, desten, ifp->if_addrlen);
 		/*
 		 * If entry has an expiry time and it is approaching,
 		 * see if we need to send an ARP request within this
 		 * arpt_down interval.
 		 */
 		if (!(la->la_flags & LLE_STATIC) &&
 		    time_uptime + la->la_preempt > la->la_expire) {
 			arprequest(ifp, NULL,
 			    &SIN(dst)->sin_addr, IF_LLADDR(ifp));
 
 			la->la_preempt--;
 		}
 		
 		*lle = la;
 		error = 0;
 		goto done;
 	} 
 			    
 	if (la->la_flags & LLE_STATIC) {   /* should not happen! */
 		log(LOG_DEBUG, "arpresolve: ouch, empty static llinfo for %s\n",
 		    inet_ntoa(SIN(dst)->sin_addr));
 		m_freem(m);
 		error = EINVAL;
 		goto done;
 	}
 
 	renew = (la->la_asked == 0 || la->la_expire != time_uptime);
 	if ((renew || m != NULL) && (flags & LLE_EXCLUSIVE) == 0) {
 		flags |= LLE_EXCLUSIVE;
 		LLE_RUNLOCK(la);
 		goto retry;
 	}
 	/*
 	 * There is an arptab entry, but no ethernet address
 	 * response yet.  Replace the held mbuf with this
 	 * latest one.
 	 */
 	if (m != NULL) {
 		if (la->la_hold != NULL)
 			m_freem(la->la_hold);
 		la->la_hold = m;
 		if (renew == 0 && (flags & LLE_EXCLUSIVE)) {
 			flags &= ~LLE_EXCLUSIVE;
 			LLE_DOWNGRADE(la);
 		}
 		
 	}
 	/*
 	 * Return EWOULDBLOCK if we have tried less than arp_maxtries. It
 	 * will be masked by ether_output(). Return EHOSTDOWN/EHOSTUNREACH
 	 * if we have already sent arp_maxtries ARP requests. Retransmit the
 	 * ARP request, but not faster than one request per second.
 	 */
 	if (la->la_asked < V_arp_maxtries)
 		error = EWOULDBLOCK;	/* First request. */
 	else
 		error =
 		    (rt0->rt_flags & RTF_GATEWAY) ? EHOSTDOWN : EHOSTUNREACH;
 
 	if (renew) {
 		LLE_ADDREF(la);
 		la->la_expire = time_uptime;
 		callout_reset(&la->la_timer, hz, arptimer, la);
 		la->la_asked++;
 		LLE_WUNLOCK(la);
 		arprequest(ifp, NULL, &SIN(dst)->sin_addr,
 		    IF_LLADDR(ifp));
 		return (error);
 	}
 done:
 	if (flags & LLE_EXCLUSIVE)
 		LLE_WUNLOCK(la);
 	else
 		LLE_RUNLOCK(la);
 	return (error);
 }
 
 /*
  * Common length and type checks are done here,
  * then the protocol-specific routine is called.
  */
 static void
 arpintr(struct mbuf *m)
 {
 	struct arphdr *ar;
 
 	if (m->m_len < sizeof(struct arphdr) &&
 	    ((m = m_pullup(m, sizeof(struct arphdr))) == NULL)) {
 		log(LOG_ERR, "arp: runt packet -- m_pullup failed\n");
 		return;
 	}
 	ar = mtod(m, struct arphdr *);
 
 	if (ntohs(ar->ar_hrd) != ARPHRD_ETHER &&
 	    ntohs(ar->ar_hrd) != ARPHRD_IEEE802 &&
 	    ntohs(ar->ar_hrd) != ARPHRD_ARCNET &&
 	    ntohs(ar->ar_hrd) != ARPHRD_IEEE1394) {
 		log(LOG_ERR, "arp: unknown hardware address format (0x%2D)\n",
 		    (unsigned char *)&ar->ar_hrd, "");
 		m_freem(m);
 		return;
 	}
 
 	if (m->m_len < arphdr_len(ar)) {
 		if ((m = m_pullup(m, arphdr_len(ar))) == NULL) {
 			log(LOG_ERR, "arp: runt packet\n");
 			m_freem(m);
 			return;
 		}
 		ar = mtod(m, struct arphdr *);
 	}
 
 	switch (ntohs(ar->ar_pro)) {
 #ifdef INET
 	case ETHERTYPE_IP:
 		in_arpinput(m);
 		return;
 #endif
 	}
 	m_freem(m);
 }
 
 #ifdef INET
 /*
  * ARP for Internet protocols on 10 Mb/s Ethernet.
  * Algorithm is that given in RFC 826.
  * In addition, a sanity check is performed on the sender
  * protocol address, to catch impersonators.
  * We no longer handle negotiations for use of trailer protocol:
  * Formerly, ARP replied for protocol type ETHERTYPE_TRAIL sent
  * along with IP replies if we wanted trailers sent to us,
  * and also sent them in response to IP replies.
  * This allowed either end to announce the desire to receive
  * trailer packets.
  * We no longer reply to requests for ETHERTYPE_TRAIL protocol either,
  * but formerly didn't normally send requests.
  */
 static int log_arp_wrong_iface = 1;
 static int log_arp_movements = 1;
 static int log_arp_permanent_modify = 1;
 
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_wrong_iface, CTLFLAG_RW,
 	&log_arp_wrong_iface, 0,
 	"log arp packets arriving on the wrong interface");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_movements, CTLFLAG_RW,
         &log_arp_movements, 0,
         "log arp replies from MACs different than the one in the cache");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_permanent_modify, CTLFLAG_RW,
         &log_arp_permanent_modify, 0,
         "log arp replies from MACs different than the one in the permanent arp entry");
 
 
 static void
 in_arpinput(struct mbuf *m)
 {
 	struct arphdr *ah;
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct llentry *la = NULL;
 	struct rtentry *rt;
 	struct ifaddr *ifa;
 	struct in_ifaddr *ia;
 	struct sockaddr sa;
 	struct in_addr isaddr, itaddr, myaddr;
 	u_int8_t *enaddr = NULL;
 	int op, flags;
 	struct mbuf *m0;
 	int req_len;
 	int bridged = 0, is_bridge = 0;
 #ifdef DEV_CARP
 	int carp_match = 0;
 #endif
 	struct sockaddr_in sin;
 	sin.sin_len = sizeof(struct sockaddr_in);
 	sin.sin_family = AF_INET;
 	sin.sin_addr.s_addr = 0;
 	INIT_VNET_INET(ifp->if_vnet);
 
 	if (ifp->if_bridge)
 		bridged = 1;
 	if (ifp->if_type == IFT_BRIDGE)
 		is_bridge = 1;
 
 	req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr));
 	if (m->m_len < req_len && (m = m_pullup(m, req_len)) == NULL) {
 		log(LOG_ERR, "in_arp: runt packet -- m_pullup failed\n");
 		return;
 	}
 
 	ah = mtod(m, struct arphdr *);
 	op = ntohs(ah->ar_op);
 	(void)memcpy(&isaddr, ar_spa(ah), sizeof (isaddr));
 	(void)memcpy(&itaddr, ar_tpa(ah), sizeof (itaddr));
 
 	/*
 	 * For a bridge, we want to check the address irrespective
 	 * of the receive interface. (This will change slightly
 	 * when we have clusters of interfaces).
 	 * If the interface does not match, but the recieving interface
 	 * is part of carp, we call carp_iamatch to see if this is a
 	 * request for the virtual host ip.
 	 * XXX: This is really ugly!
 	 */
 	LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
 		if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
 		    ia->ia_ifp == ifp) &&
 		    itaddr.s_addr == ia->ia_addr.sin_addr.s_addr)
 			goto match;
 #ifdef DEV_CARP
 		if (ifp->if_carp != NULL &&
 		    carp_iamatch(ifp->if_carp, ia, &isaddr, &enaddr) &&
 		    itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
 			carp_match = 1;
 			goto match;
 		}
 #endif
 	}
 	LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash)
 		if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
 		    ia->ia_ifp == ifp) &&
 		    isaddr.s_addr == ia->ia_addr.sin_addr.s_addr)
 			goto match;
 
 #define BDG_MEMBER_MATCHES_ARP(addr, ifp, ia)				\
   (ia->ia_ifp->if_bridge == ifp->if_softc &&				\
   !bcmp(IF_LLADDR(ia->ia_ifp), IF_LLADDR(ifp), ifp->if_addrlen) &&	\
   addr == ia->ia_addr.sin_addr.s_addr)
 	/*
 	 * Check the case when bridge shares its MAC address with
 	 * some of its children, so packets are claimed by bridge
 	 * itself (bridge_input() does it first), but they are really
 	 * meant to be destined to the bridge member.
 	 */
 	if (is_bridge) {
 		LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
 			if (BDG_MEMBER_MATCHES_ARP(itaddr.s_addr, ifp, ia)) {
 				ifp = ia->ia_ifp;
 				goto match;
 			}
 		}
 	}
 #undef BDG_MEMBER_MATCHES_ARP
 
 	/*
 	 * No match, use the first inet address on the receive interface
 	 * as a dummy address for the rest of the function.
 	 */
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (ifa->ifa_addr->sa_family == AF_INET) {
 			ia = ifatoia(ifa);
 			goto match;
 		}
 	/*
 	 * If bridging, fall back to using any inet address.
 	 */
 	if (!bridged || (ia = TAILQ_FIRST(&V_in_ifaddrhead)) == NULL)
 		goto drop;
 match:
 	if (!enaddr)
 		enaddr = (u_int8_t *)IF_LLADDR(ifp);
 	myaddr = ia->ia_addr.sin_addr;
 	if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen))
 		goto drop;	/* it's from me, ignore it. */
 	if (!bcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) {
 		log(LOG_ERR,
 		    "arp: link address is broadcast for IP address %s!\n",
 		    inet_ntoa(isaddr));
 		goto drop;
 	}
 	/*
 	 * Warn if another host is using the same IP address, but only if the
 	 * IP address isn't 0.0.0.0, which is used for DHCP only, in which
 	 * case we suppress the warning to avoid false positive complaints of
 	 * potential misconfiguration.
 	 */
 	if (!bridged && isaddr.s_addr == myaddr.s_addr && myaddr.s_addr != 0) {
 		log(LOG_ERR,
 		   "arp: %*D is using my IP address %s on %s!\n",
 		   ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
 		   inet_ntoa(isaddr), ifp->if_xname);
 		itaddr = myaddr;
 		goto reply;
 	}
 	if (ifp->if_flags & IFF_STATICARP)
 		goto reply;
 
 	bzero(&sin, sizeof(sin));
 	sin.sin_len = sizeof(struct sockaddr_in);
 	sin.sin_family = AF_INET;
 	sin.sin_addr = isaddr;
 	flags = (itaddr.s_addr == myaddr.s_addr) ? LLE_CREATE : 0;
 	flags |= LLE_EXCLUSIVE;
 	IF_AFDATA_LOCK(ifp); 
 	la = lla_lookup(LLTABLE(ifp), flags, (struct sockaddr *)&sin);
 	IF_AFDATA_UNLOCK(ifp);
 	if (la != NULL) {
 		/* the following is not an error when doing bridging */
 		if (!bridged && la->lle_tbl->llt_ifp != ifp
 #ifdef DEV_CARP
 		    && (ifp->if_type != IFT_CARP || !carp_match)
 #endif
 			) {
 			if (log_arp_wrong_iface)
 				log(LOG_ERR, "arp: %s is on %s "
 				    "but got reply from %*D on %s\n",
 				    inet_ntoa(isaddr),
 				    la->lle_tbl->llt_ifp->if_xname,
 				    ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
 				    ifp->if_xname);
 			goto reply;
 		}
 		if ((la->la_flags & LLE_VALID) &&
 		    bcmp(ar_sha(ah), &la->ll_addr, ifp->if_addrlen)) {
 			if (la->la_flags & LLE_STATIC) {
 				log(LOG_ERR,
 				    "arp: %*D attempts to modify permanent "
 				    "entry for %s on %s\n",
 				    ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
 				    inet_ntoa(isaddr), ifp->if_xname);
 				goto reply;
 			}
 			if (log_arp_movements) {
 			        log(LOG_INFO, "arp: %s moved from %*D "
 				    "to %*D on %s\n",
 				    inet_ntoa(isaddr),
 				    ifp->if_addrlen,
 				    (u_char *)&la->ll_addr, ":",
 				    ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
 				    ifp->if_xname);
 			}
 		}
 		    
 		if (ifp->if_addrlen != ah->ar_hln) {
 			log(LOG_WARNING,
 			    "arp from %*D: addr len: new %d, i/f %d (ignored)",
 			    ifp->if_addrlen, (u_char *) ar_sha(ah), ":",
 			    ah->ar_hln, ifp->if_addrlen);
 			goto reply;
 		}
 		(void)memcpy(&la->ll_addr, ar_sha(ah), ifp->if_addrlen);
 		la->la_flags |= LLE_VALID;
 
 		if (!(la->la_flags & LLE_STATIC)) {
 			la->la_expire = time_uptime + V_arpt_keep;
 			callout_reset(&la->la_timer, hz * V_arpt_keep,
 			    arptimer, la);
 		}
 		la->la_asked = 0;
 		la->la_preempt = V_arp_maxtries;
 		if (la->la_hold != NULL) {
 			m0 = la->la_hold;
 			la->la_hold = 0;
 			memcpy(&sa, L3_ADDR(la), sizeof(sa));
 			LLE_WUNLOCK(la);
 			
 			(*ifp->if_output)(ifp, m0, &sa, NULL);
 			return;
 		}
 	}
 reply:
 	if (op != ARPOP_REQUEST)
 		goto drop;
 
 	if (itaddr.s_addr == myaddr.s_addr) {
 		/* Shortcut.. the receiving interface is the target. */
 		(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
 		(void)memcpy(ar_sha(ah), enaddr, ah->ar_hln);
 	} else {
 		struct llentry *lle = NULL;
 
 		if (!V_arp_proxyall)
 			goto drop;
 
 		sin.sin_addr = itaddr;
 		/* XXX MRT use table 0 for arp reply  */
 		rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0);
 		if (!rt)
 			goto drop;
 
 		/*
 		 * Don't send proxies for nodes on the same interface
 		 * as this one came out of, or we'll get into a fight
 		 * over who claims what Ether address.
 		 */
 		if (!rt->rt_ifp || rt->rt_ifp == ifp) {
 			RTFREE_LOCKED(rt);
 			goto drop;
 		}
 		IF_AFDATA_LOCK(rt->rt_ifp); 
 		lle = lla_lookup(LLTABLE(rt->rt_ifp), 0, (struct sockaddr *)&sin);
 		IF_AFDATA_UNLOCK(rt->rt_ifp);
 		RTFREE_LOCKED(rt);
 
 		if (lle != NULL) {
 			(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
 			(void)memcpy(ar_sha(ah), &lle->ll_addr, ah->ar_hln);
 			LLE_RUNLOCK(lle);
 		} else
 			goto drop;
 
 		/*
 		 * Also check that the node which sent the ARP packet
 		 * is on the the interface we expect it to be on. This
 		 * avoids ARP chaos if an interface is connected to the
 		 * wrong network.
 		 */
 		sin.sin_addr = isaddr;
 
 		/* XXX MRT use table 0 for arp checks */
 		rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0);
 		if (!rt)
 			goto drop;
 		if (rt->rt_ifp != ifp) {
 			log(LOG_INFO, "arp_proxy: ignoring request"
 			    " from %s via %s, expecting %s\n",
 			    inet_ntoa(isaddr), ifp->if_xname,
 			    rt->rt_ifp->if_xname);
 			RTFREE_LOCKED(rt);
 			goto drop;
 		}
 		RTFREE_LOCKED(rt);
 
 #ifdef DEBUG_PROXY
 		printf("arp: proxying for %s\n",
 		       inet_ntoa(itaddr));
 #endif
 	}
 
 	if (la != NULL)
 		LLE_WUNLOCK(la);
 	if (itaddr.s_addr == myaddr.s_addr &&
 	    IN_LINKLOCAL(ntohl(itaddr.s_addr))) {
 		/* RFC 3927 link-local IPv4; always reply by broadcast. */
 #ifdef DEBUG_LINKLOCAL
 		printf("arp: sending reply for link-local addr %s\n",
 		    inet_ntoa(itaddr));
 #endif
 		m->m_flags |= M_BCAST;
 		m->m_flags &= ~M_MCAST;
 	} else {
 		/* default behaviour; never reply by broadcast. */
 		m->m_flags &= ~(M_BCAST|M_MCAST);
 	}
 	(void)memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln);
 	(void)memcpy(ar_spa(ah), &itaddr, ah->ar_pln);
 	ah->ar_op = htons(ARPOP_REPLY);
 	ah->ar_pro = htons(ETHERTYPE_IP); /* let's be sure! */
 	m->m_len = sizeof(*ah) + (2 * ah->ar_pln) + (2 * ah->ar_hln);   
 	m->m_pkthdr.len = m->m_len;   
 	sa.sa_family = AF_ARP;
 	sa.sa_len = 2;
 	(*ifp->if_output)(ifp, m, &sa, NULL);
 	return;
 
 drop:
 	if (la != NULL)
 		LLE_WUNLOCK(la);
 	m_freem(m);
 }
 #endif
 
 void
 arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
 {
 	struct llentry *lle;
 
 	if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) {
 		arprequest(ifp, &IA_SIN(ifa)->sin_addr,
 				&IA_SIN(ifa)->sin_addr, IF_LLADDR(ifp));
 		/* 
 		 * interface address is considered static entry
 		 * because the output of the arp utility shows
 		 * that L2 entry as permanent
 		 */
 		IF_AFDATA_LOCK(ifp);
 		lle = lla_lookup(LLTABLE(ifp), (LLE_CREATE | LLE_IFADDR | LLE_STATIC),
 				 (struct sockaddr *)IA_SIN(ifa));
 		IF_AFDATA_UNLOCK(ifp);
 		if (lle == NULL)
 			log(LOG_INFO, "arp_ifinit: cannot create arp "
 			    "entry for interface address\n");
 		else
 			LLE_RUNLOCK(lle);
 	}
 	ifa->ifa_rtrequest = NULL;
 }
 
 void
 arp_ifinit2(struct ifnet *ifp, struct ifaddr *ifa, u_char *enaddr)
 {
 	if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY)
 		arprequest(ifp, &IA_SIN(ifa)->sin_addr,
 				&IA_SIN(ifa)->sin_addr, enaddr);
 	ifa->ifa_rtrequest = NULL;
 }
 
 static int
 arp_iattach(const void *unused __unused)
 {
 	INIT_VNET_INET(curvnet);
 
 	V_arpt_keep = (20*60); /* once resolved, good for 20 more minutes */
 	V_arp_maxtries = 5;
 	V_useloopback = 1; /* use loopback interface for local traffic */
 	V_arp_proxyall = 0;
 
 	return (0);
 }
 
 static void
 arp_init(void)
 {
 
 #ifndef VIMAGE_GLOBALS
 	vnet_mod_register(&vnet_arp_modinfo);
 #else
 	arp_iattach(NULL);
 #endif
 
-	netisr2_register(&arp_nh);
+	netisr_register(&arp_nh);
 }
 SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0);
Index: projects/pnet/sys/netinet/igmp.c
===================================================================
--- projects/pnet/sys/netinet/igmp.c	(revision 193105)
+++ projects/pnet/sys/netinet/igmp.c	(revision 193106)
@@ -1,3656 +1,3656 @@
 /*-
  * Copyright (c) 2007-2009 Bruce Simpson.
  * Copyright (c) 1988 Stephen Deering.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Stephen Deering of Stanford University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
  */
 
 /*
  * Internet Group Management Protocol (IGMP) routines.
  * [RFC1112, RFC2236, RFC3376]
  *
  * Written by Steve Deering, Stanford, May 1988.
  * Modified by Rosen Sharma, Stanford, Aug 1994.
  * Modified by Bill Fenner, Xerox PARC, Feb 1995.
  * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
  * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson.
  *
  * MULTICAST Revision: 3.5.1.4
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_mac.h"
 #include "opt_route.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/module.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/protosw.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/vimage.h>
 #include <sys/ktr.h>
 #include <sys/condvar.h>
 
 #include <net/if.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #include <netinet/igmp.h>
 #include <netinet/igmp_var.h>
 #include <netinet/vinet.h>
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 #ifndef KTR_IGMPV3
 #define KTR_IGMPV3 KTR_INET
 #endif
 
 static struct igmp_ifinfo *
 		igi_alloc_locked(struct ifnet *);
 static void	igi_delete_locked(const struct ifnet *);
 static void	igmp_dispatch_queue(struct ifqueue *, int, const int);
 static void	igmp_fasttimo_vnet(void);
 static void	igmp_final_leave(struct in_multi *, struct igmp_ifinfo *);
 static int	igmp_handle_state_change(struct in_multi *,
 		    struct igmp_ifinfo *);
 static int	igmp_initial_join(struct in_multi *, struct igmp_ifinfo *);
 static int	igmp_input_v1_query(struct ifnet *, const struct ip *);
 static int	igmp_input_v2_query(struct ifnet *, const struct ip *,
 		    const struct igmp *);
 static int	igmp_input_v3_query(struct ifnet *, const struct ip *,
 		    /*const*/ struct igmpv3 *);
 static int	igmp_input_v3_group_query(struct in_multi *,
 		    struct igmp_ifinfo *, int, /*const*/ struct igmpv3 *);
 static int	igmp_input_v1_report(struct ifnet *, /*const*/ struct ip *,
 		    /*const*/ struct igmp *);
 static int	igmp_input_v2_report(struct ifnet *, /*const*/ struct ip *,
 		    /*const*/ struct igmp *);
 static void	igmp_intr(struct mbuf *);
 static int	igmp_isgroupreported(const struct in_addr);
 static struct mbuf *
 		igmp_ra_alloc(void);
 #ifdef KTR
 static char *	igmp_rec_type_to_str(const int);
 #endif
 static void	igmp_set_version(struct igmp_ifinfo *, const int);
 static void	igmp_slowtimo_vnet(void);
 static void	igmp_sysinit(void);
 static int	igmp_v1v2_queue_report(struct in_multi *, const int);
 static void	igmp_v1v2_process_group_timer(struct in_multi *, const int);
 static void	igmp_v1v2_process_querier_timers(struct igmp_ifinfo *);
 static void	igmp_v2_update_group(struct in_multi *, const int);
 static void	igmp_v3_cancel_link_timers(struct igmp_ifinfo *);
 static void	igmp_v3_dispatch_general_query(struct igmp_ifinfo *);
 static struct mbuf *
 		igmp_v3_encap_report(struct ifnet *, struct mbuf *);
 static int	igmp_v3_enqueue_group_record(struct ifqueue *,
 		    struct in_multi *, const int, const int, const int);
 static int	igmp_v3_enqueue_filter_change(struct ifqueue *,
 		    struct in_multi *);
 static void	igmp_v3_process_group_timers(struct igmp_ifinfo *,
 		    struct ifqueue *, struct ifqueue *, struct in_multi *,
 		    const int);
 static int	igmp_v3_merge_state_changes(struct in_multi *,
 		    struct ifqueue *);
 static void	igmp_v3_suppress_group_record(struct in_multi *);
 static int	sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS);
 static int	sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS);
 static int	sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS);
 
 static vnet_attach_fn	vnet_igmp_iattach;
 static vnet_detach_fn	vnet_igmp_idetach;
 
 static const struct netisr_handler igmp_nh = {
 	.nh_name = "igmp",
 	.nh_handler = igmp_intr,
 	.nh_proto = NETISR_IGMP,
 	.nh_qlimit = IFQ_MAXLEN,
 	.nh_policy = NETISR_POLICY_SOURCE,
 };
 
 /*
  * System-wide globals.
  *
  * Unlocked access to these is OK, except for the global IGMP output
  * queue. The IGMP subsystem lock ends up being system-wide for the moment,
  * because all VIMAGEs have to share a global output queue, as netisrs
  * themselves are not virtualized.
  *
  * Locking:
  *  * The permitted lock order is: IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
  *    Any may be taken independently; if any are held at the same
  *    time, the above lock order must be followed.
  *  * All output is delegated to the netisr.
  *    Now that Giant has been eliminated, the netisr may be inlined.
  *  * IN_MULTI_LOCK covers in_multi.
  *  * IGMP_LOCK covers igmp_ifinfo and any global variables in this file,
  *    including the output queue.
  *  * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of
  *    per-link state iterators.
  *  * igmp_ifinfo is valid as long as PF_INET is attached to the interface,
  *    therefore it is not refcounted.
  *    We allow unlocked reads of igmp_ifinfo when accessed via in_multi.
  *
  * Reference counting
  *  * IGMP acquires its own reference every time an in_multi is passed to
  *    it and the group is being joined for the first time.
  *  * IGMP releases its reference(s) on in_multi in a deferred way,
  *    because the operations which process the release run as part of
  *    a loop whose control variables are directly affected by the release
  *    (that, and not recursing on the IF_ADDR_LOCK).
  *
  * VIMAGE: Each in_multi corresponds to an ifp, and each ifp corresponds
  * to a vnet in ifp->if_vnet.
  *
  * SMPng: XXX We may potentially race operations on ifma_protospec.
  * The problem is that we currently lack a clean way of taking the
  * IF_ADDR_LOCK() between the ifnet and in layers w/o recursing,
  * as anything which modifies ifma needs to be covered by that lock.
  * So check for ifma_protospec being NULL before proceeding.
  */
 struct mtx		 igmp_mtx;
 
 struct mbuf		*m_raopt;		 /* Router Alert option */
 MALLOC_DEFINE(M_IGMP, "igmp", "igmp state");
 
 /*
  * VIMAGE-wide globals.
  *
  * The IGMPv3 timers themselves need to run per-image, however,
  * protosw timers run globally (see tcp).
  * An ifnet can only be in one vimage at a time, and the loopback
  * ifnet, loif, is itself virtualized.
  * It would otherwise be possible to seriously hose IGMP state,
  * and create inconsistencies in upstream multicast routing, if you have
  * multiple VIMAGEs running on the same link joining different multicast
  * groups, UNLESS the "primary IP address" is different. This is because
  * IGMP for IPv4 does not force link-local addresses to be used for each
  * node, unlike MLD for IPv6.
  * Obviously the IGMPv3 per-interface state has per-vimage granularity
  * also as a result.
  *
  * FUTURE: Stop using IFP_TO_IA/INADDR_ANY, and use source address selection
  * policy to control the address used by IGMP on the link.
  */
 #ifdef VIMAGE_GLOBALS
 int	 interface_timers_running;	 /* IGMPv3 general query response */
 int	 state_change_timers_running;	 /* IGMPv3 state-change retransmit */
 int	 current_state_timers_running;	 /* IGMPv1/v2 host report;
 					  * IGMPv3 g/sg query response */
 
 LIST_HEAD(, igmp_ifinfo)	 igi_head;
 struct igmpstat			 igmpstat;
 struct timeval			 igmp_gsrdelay;
 
 int	 igmp_recvifkludge;
 int	 igmp_sendra;
 int	 igmp_sendlocal;
 int	 igmp_v1enable;
 int	 igmp_v2enable;
 int	 igmp_legacysupp;
 int	 igmp_default_version;
 #endif /* VIMAGE_GLOBALS */
 
 /*
  * Virtualized sysctls.
  */
 SYSCTL_V_STRUCT(V_NET, vnet_inet, _net_inet_igmp, IGMPCTL_STATS, stats,
     CTLFLAG_RW, igmpstat, igmpstat, "");
 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, recvifkludge,
     CTLFLAG_RW, igmp_recvifkludge, 0,
     "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address");
 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, sendra,
     CTLFLAG_RW, igmp_sendra, 0,
     "Send IP Router Alert option in IGMPv2/v3 messages");
 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, sendlocal,
     CTLFLAG_RW, igmp_sendlocal, 0,
     "Send IGMP membership reports for 224.0.0.0/24 groups");
 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, v1enable,
     CTLFLAG_RW, igmp_v1enable, 0,
     "Enable backwards compatibility with IGMPv1");
 SYSCTL_V_INT(V_NET, vnet_inet,  _net_inet_igmp, OID_AUTO, v2enable,
     CTLFLAG_RW, igmp_v2enable, 0,
     "Enable backwards compatibility with IGMPv2");
 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, legacysupp,
     CTLFLAG_RW, igmp_legacysupp, 0,
     "Allow v1/v2 reports to suppress v3 group responses");
 SYSCTL_V_PROC(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, default_version,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, igmp_default_version, 0,
     sysctl_igmp_default_version, "I",
     "Default version of IGMP to run on each interface");
 SYSCTL_V_PROC(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, gsrdelay,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, igmp_gsrdelay.tv_sec, 0,
     sysctl_igmp_gsr, "I",
     "Rate limit for IGMPv3 Group-and-Source queries in seconds");
 
 /*
  * Non-virtualized sysctls.
  */
 SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_MPSAFE,
     sysctl_igmp_ifinfo, "Per-interface IGMPv3 state");
 
 static __inline void
 igmp_save_context(struct mbuf *m, struct ifnet *ifp)
 {
 
 #ifdef VIMAGE
 	m->m_pkthdr.header = ifp->if_vnet;
 #endif /* VIMAGE */
 	m->m_pkthdr.flowid = ifp->if_index;
 }
 
 static __inline void
 igmp_scrub_context(struct mbuf *m)
 {
 
 	m->m_pkthdr.header = NULL;
 	m->m_pkthdr.flowid = 0;
 }
 
 #ifdef KTR
 static __inline char *
 inet_ntoa_haddr(in_addr_t haddr)
 {
 	struct in_addr ia;
 
 	ia.s_addr = htonl(haddr);
 	return (inet_ntoa(ia));
 }
 #endif
 
 /*
  * Restore context from a queued IGMP output chain.
  * Return saved ifindex.
  *
  * VIMAGE: The assertion is there to make sure that we
  * actually called CURVNET_SET() with what's in the mbuf chain.
  */
 static __inline uint32_t
 igmp_restore_context(struct mbuf *m)
 {
 
 #ifdef notyet
 #if defined(VIMAGE) && defined(INVARIANTS)
 	KASSERT(curvnet == (m->m_pkthdr.header),
 	    ("%s: called when curvnet was not restored", __func__));
 #endif
 #endif
 	return (m->m_pkthdr.flowid);
 }
 
 /*
  * Retrieve or set default IGMP version.
  *
  * VIMAGE: Assume curvnet set by caller.
  * SMPng: NOTE: Serialized by IGMP lock.
  */
 static int
 sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS)
 {
 	INIT_VNET_INET(curvnet);
 	int	 error;
 	int	 new;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error)
 		return (error);
 
 	IGMP_LOCK();
 
 	new = V_igmp_default_version;
 
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error || !req->newptr)
 		goto out_locked;
 
 	if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) {
 		error = EINVAL;
 		goto out_locked;
 	}
 
 	CTR2(KTR_IGMPV3, "change igmp_default_version from %d to %d",
 	     V_igmp_default_version, new);
 
 	V_igmp_default_version = new;
 
 out_locked:
 	IGMP_UNLOCK();
 	return (error);
 }
 
 /*
  * Retrieve or set threshold between group-source queries in seconds.
  *
  * VIMAGE: Assume curvnet set by caller.
  * SMPng: NOTE: Serialized by IGMP lock.
  */
 static int
 sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS)
 {
 	INIT_VNET_INET(curvnet);
 	int error;
 	int i;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error)
 		return (error);
 
 	IGMP_LOCK();
 
 	i = V_igmp_gsrdelay.tv_sec;
 
 	error = sysctl_handle_int(oidp, &i, 0, req);
 	if (error || !req->newptr)
 		goto out_locked;
 
 	if (i < -1 || i >= 60) {
 		error = EINVAL;
 		goto out_locked;
 	}
 
 	CTR2(KTR_IGMPV3, "change igmp_gsrdelay from %d to %d",
 	     V_igmp_gsrdelay.tv_sec, i);
 	V_igmp_gsrdelay.tv_sec = i;
 
 out_locked:
 	IGMP_UNLOCK();
 	return (error);
 }
 
 /*
  * Expose struct igmp_ifinfo to userland, keyed by ifindex.
  * For use by ifmcstat(8).
  *
  * SMPng: NOTE: Does an unlocked ifindex space read.
  * VIMAGE: Assume curvnet set by caller. The node handler itself
  * is not directly virtualized.
  */
 static int
 sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS)
 {
 	INIT_VNET_NET(curvnet);
 	INIT_VNET_INET(curvnet);
 	int			*name;
 	int			 error;
 	u_int			 namelen;
 	struct ifnet		*ifp;
 	struct igmp_ifinfo	*igi;
 
 	name = (int *)arg1;
 	namelen = arg2;
 
 	if (req->newptr != NULL)
 		return (EPERM);
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	error = sysctl_wire_old_buffer(req, sizeof(struct igmp_ifinfo));
 	if (error)
 		return (error);
 
 	IN_MULTI_LOCK();
 	IGMP_LOCK();
 
 	if (name[0] <= 0 || name[0] > V_if_index) {
 		error = ENOENT;
 		goto out_locked;
 	}
 
 	error = ENOENT;
 
 	ifp = ifnet_byindex(name[0]);
 	if (ifp == NULL)
 		goto out_locked;
 
 	LIST_FOREACH(igi, &V_igi_head, igi_link) {
 		if (ifp == igi->igi_ifp) {
 			error = SYSCTL_OUT(req, igi,
 			    sizeof(struct igmp_ifinfo));
 			break;
 		}
 	}
 
 out_locked:
 	IGMP_UNLOCK();
 	IN_MULTI_UNLOCK();
 	return (error);
 }
 
 /*
  * Dispatch an entire queue of pending packet chains
  * using the netisr.
  * VIMAGE: Assumes the vnet pointer has been set.
  */
 static void
 igmp_dispatch_queue(struct ifqueue *ifq, int limit, const int loop)
 {
 	struct mbuf *m;
 
 	for (;;) {
 		_IF_DEQUEUE(ifq, m);
 		if (m == NULL)
 			break;
 		CTR3(KTR_IGMPV3, "%s: dispatch %p from %p", __func__, ifq, m);
 		if (loop)
 			m->m_flags |= M_IGMP_LOOP;
 		netisr_dispatch(NETISR_IGMP, m);
 		if (--limit == 0)
 			break;
 	}
 }
 
 /*
  * Filter outgoing IGMP report state by group.
  *
  * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1).
  * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are
  * disabled for all groups in the 224.0.0.0/24 link-local scope. However,
  * this may break certain IGMP snooping switches which rely on the old
  * report behaviour.
  *
  * Return zero if the given group is one for which IGMP reports
  * should be suppressed, or non-zero if reports should be issued.
  */
 static __inline int
 igmp_isgroupreported(const struct in_addr addr)
 {
 	INIT_VNET_INET(curvnet);
 
 	if (in_allhosts(addr) ||
 	    ((!V_igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr)))))
 		return (0);
 
 	return (1);
 }
 
 /*
  * Construct a Router Alert option to use in outgoing packets.
  */
 static struct mbuf *
 igmp_ra_alloc(void)
 {
 	struct mbuf	*m;
 	struct ipoption	*p;
 
 	MGET(m, M_DONTWAIT, MT_DATA);
 	p = mtod(m, struct ipoption *);
 	p->ipopt_dst.s_addr = INADDR_ANY;
 	p->ipopt_list[0] = IPOPT_RA;	/* Router Alert Option */
 	p->ipopt_list[1] = 0x04;	/* 4 bytes long */
 	p->ipopt_list[2] = IPOPT_EOL;	/* End of IP option list */
 	p->ipopt_list[3] = 0x00;	/* pad byte */
 	m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
 
 	return (m);
 }
 
 /*
  * Attach IGMP when PF_INET is attached to an interface.
  */
 struct igmp_ifinfo *
 igmp_domifattach(struct ifnet *ifp)
 {
 	struct igmp_ifinfo *igi;
 
 	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)",
 	    __func__, ifp, ifp->if_xname);
 
 	IGMP_LOCK();
 
 	igi = igi_alloc_locked(ifp);
 	if (!(ifp->if_flags & IFF_MULTICAST))
 		igi->igi_flags |= IGIF_SILENT;
 
 	IGMP_UNLOCK();
 
 	return (igi);
 }
 
 /*
  * VIMAGE: assume curvnet set by caller.
  */
 static struct igmp_ifinfo *
 igi_alloc_locked(/*const*/ struct ifnet *ifp)
 {
 	INIT_VNET_INET(ifp->if_vnet);
 	struct igmp_ifinfo *igi;
 
 	IGMP_LOCK_ASSERT();
 
 	igi = malloc(sizeof(struct igmp_ifinfo), M_IGMP, M_NOWAIT|M_ZERO);
 	if (igi == NULL)
 		goto out;
 
 	igi->igi_ifp = ifp;
 	igi->igi_version = V_igmp_default_version;
 	igi->igi_flags = 0;
 	igi->igi_rv = IGMP_RV_INIT;
 	igi->igi_qi = IGMP_QI_INIT;
 	igi->igi_qri = IGMP_QRI_INIT;
 	igi->igi_uri = IGMP_URI_INIT;
 
 	SLIST_INIT(&igi->igi_relinmhead);
 
 	/*
 	 * Responses to general queries are subject to bounds.
 	 */
 	IFQ_SET_MAXLEN(&igi->igi_gq, IGMP_MAX_RESPONSE_PACKETS);
 
 	LIST_INSERT_HEAD(&V_igi_head, igi, igi_link);
 
 	CTR2(KTR_IGMPV3, "allocate igmp_ifinfo for ifp %p(%s)",
 	     ifp, ifp->if_xname);
 
 out:
 	return (igi);
 }
 
 /*
  * Hook for ifdetach.
  *
  * NOTE: Some finalization tasks need to run before the protocol domain
  * is detached, but also before the link layer does its cleanup.
  *
  * SMPNG: igmp_ifdetach() needs to take IF_ADDR_LOCK().
  * XXX This is also bitten by unlocked ifma_protospec access.
  */
 void
 igmp_ifdetach(struct ifnet *ifp)
 {
 	struct igmp_ifinfo	*igi;
 	struct ifmultiaddr	*ifma;
 	struct in_multi		*inm, *tinm;
 
 	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", __func__, ifp,
 	    ifp->if_xname);
 
 	IGMP_LOCK();
 
 	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
 	if (igi->igi_version == IGMP_VERSION_3) {
 		IF_ADDR_LOCK(ifp);
 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			if (ifma->ifma_addr->sa_family != AF_INET ||
 			    ifma->ifma_protospec == NULL)
 				continue;
 #if 0
 			KASSERT(ifma->ifma_protospec != NULL,
 			    ("%s: ifma_protospec is NULL", __func__));
 #endif
 			inm = (struct in_multi *)ifma->ifma_protospec;
 			if (inm->inm_state == IGMP_LEAVING_MEMBER) {
 				SLIST_INSERT_HEAD(&igi->igi_relinmhead,
 				    inm, inm_nrele);
 			}
 			inm_clear_recorded(inm);
 		}
 		IF_ADDR_UNLOCK(ifp);
 		/*
 		 * Free the in_multi reference(s) for this IGMP lifecycle.
 		 */
 		SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead, inm_nrele,
 		    tinm) {
 			SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
 			inm_release_locked(inm);
 		}
 	}
 
 	IGMP_UNLOCK();
 }
 
 /*
  * Hook for domifdetach.
  */
 void
 igmp_domifdetach(struct ifnet *ifp)
 {
 	struct igmp_ifinfo *igi;
 
 	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)",
 	    __func__, ifp, ifp->if_xname);
 
 	IGMP_LOCK();
 
 	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
 	igi_delete_locked(ifp);
 
 	IGMP_UNLOCK();
 }
 
 static void
 igi_delete_locked(const struct ifnet *ifp)
 {
 	INIT_VNET_INET(ifp->if_vnet);
 	struct igmp_ifinfo *igi, *tigi;
 
 	CTR3(KTR_IGMPV3, "%s: freeing igmp_ifinfo for ifp %p(%s)",
 	    __func__, ifp, ifp->if_xname);
 
 	IGMP_LOCK_ASSERT();
 
 	LIST_FOREACH_SAFE(igi, &V_igi_head, igi_link, tigi) {
 		if (igi->igi_ifp == ifp) {
 			/*
 			 * Free deferred General Query responses.
 			 */
 			_IF_DRAIN(&igi->igi_gq);
 
 			LIST_REMOVE(igi, igi_link);
 
 			KASSERT(SLIST_EMPTY(&igi->igi_relinmhead),
 			    ("%s: there are dangling in_multi references",
 			    __func__));
 
 			free(igi, M_IGMP);
 			return;
 		}
 	}
 
 #ifdef INVARIANTS
 	panic("%s: igmp_ifinfo not found for ifp %p\n", __func__,  ifp);
 #endif
 }
 
 /*
  * Process a received IGMPv1 query.
  * Return non-zero if the message should be dropped.
  *
  * VIMAGE: The curvnet pointer is derived from the input ifp.
  */
 static int
 igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip)
 {
 	INIT_VNET_INET(ifp->if_vnet);
 	struct ifmultiaddr	*ifma;
 	struct igmp_ifinfo	*igi;
 	struct in_multi		*inm;
 
 	/*
 	 * IGMPv1 General Queries SHOULD always addressed to 224.0.0.1.
 	 * igmp_group is always ignored. Do not drop it as a userland
 	 * daemon may wish to see it.
 	 */
 	if (!in_allhosts(ip->ip_dst)) {
 		IGMPSTAT_INC(igps_rcv_badqueries);
 		return (0);
 	}
 
 	IGMPSTAT_INC(igps_rcv_gen_queries);
 
 	/*
 	 * Switch to IGMPv1 host compatibility mode.
 	 */
 	IN_MULTI_LOCK();
 	IGMP_LOCK();
 
 	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
 	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
 
 	if (igi->igi_flags & IGIF_LOOPBACK) {
 		CTR2(KTR_IGMPV3, "ignore v1 query on IGIF_LOOPBACK ifp %p(%s)",
 		    ifp, ifp->if_xname);
 		goto out_locked;
 	}
 
 	igmp_set_version(igi, IGMP_VERSION_1);
 
 	CTR2(KTR_IGMPV3, "process v1 query on ifp %p(%s)", ifp, ifp->if_xname);
 
 	/*
 	 * Start the timers in all of our group records
 	 * for the interface on which the query arrived,
 	 * except those which are already running.
 	 */
 	IF_ADDR_LOCK(ifp);
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_INET ||
 		    ifma->ifma_protospec == NULL)
 			continue;
 		inm = (struct in_multi *)ifma->ifma_protospec;
 		if (inm->inm_timer != 0)
 			continue;
 		switch (inm->inm_state) {
 		case IGMP_NOT_MEMBER:
 		case IGMP_SILENT_MEMBER:
 			break;
 		case IGMP_G_QUERY_PENDING_MEMBER:
 		case IGMP_SG_QUERY_PENDING_MEMBER:
 		case IGMP_REPORTING_MEMBER:
 		case IGMP_IDLE_MEMBER:
 		case IGMP_LAZY_MEMBER:
 		case IGMP_SLEEPING_MEMBER:
 		case IGMP_AWAKENING_MEMBER:
 			inm->inm_state = IGMP_REPORTING_MEMBER;
 			inm->inm_timer = IGMP_RANDOM_DELAY(
 			    IGMP_V1V2_MAX_RI * PR_FASTHZ);
 			V_current_state_timers_running = 1;
 			break;
 		case IGMP_LEAVING_MEMBER:
 			break;
 		}
 	}
 	IF_ADDR_UNLOCK(ifp);
 
 out_locked:
 	IGMP_UNLOCK();
 	IN_MULTI_UNLOCK();
 
 	return (0);
 }
 
 /*
  * Process a received IGMPv2 general or group-specific query.
  */
 static int
 igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
     const struct igmp *igmp)
 {
 	INIT_VNET_INET(ifp->if_vnet);
 	struct ifmultiaddr	*ifma;
 	struct igmp_ifinfo	*igi;
 	struct in_multi		*inm;
 	uint16_t		 timer;
 
 	/*
 	 * Perform lazy allocation of IGMP link info if required,
 	 * and switch to IGMPv2 host compatibility mode.
 	 */
 	IN_MULTI_LOCK();
 	IGMP_LOCK();
 
 	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
 	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
 
 	if (igi->igi_flags & IGIF_LOOPBACK) {
 		CTR2(KTR_IGMPV3, "ignore v2 query on IGIF_LOOPBACK ifp %p(%s)",
 		    ifp, ifp->if_xname);
 		goto out_locked;
 	}
 
 	igmp_set_version(igi, IGMP_VERSION_2);
 
 	timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE;
 	if (timer == 0)
 		timer = 1;
 
 	if (!in_nullhost(igmp->igmp_group)) {
 		/*
 		 * IGMPv2 Group-Specific Query.
 		 * If this is a group-specific IGMPv2 query, we need only
 		 * look up the single group to process it.
 		 */
 		inm = inm_lookup(ifp, igmp->igmp_group);
 		if (inm != NULL) {
 			CTR3(KTR_IGMPV3, "process v2 query %s on ifp %p(%s)",
 			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
 			igmp_v2_update_group(inm, timer);
 		}
 		IGMPSTAT_INC(igps_rcv_group_queries);
 	} else {
 		/*
 		 * IGMPv2 General Query.
 		 * If this was not sent to the all-hosts group, ignore it.
 		 */
 		if (in_allhosts(ip->ip_dst)) {
 			/*
 			 * For each reporting group joined on this
 			 * interface, kick the report timer.
 			 */
 			CTR2(KTR_IGMPV3,
 			    "process v2 general query on ifp %p(%s)",
 			    ifp, ifp->if_xname);
 
 			IF_ADDR_LOCK(ifp);
 			TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 				if (ifma->ifma_addr->sa_family != AF_INET ||
 				    ifma->ifma_protospec == NULL)
 					continue;
 				inm = (struct in_multi *)ifma->ifma_protospec;
 				igmp_v2_update_group(inm, timer);
 			}
 			IF_ADDR_UNLOCK(ifp);
 		}
 		IGMPSTAT_INC(igps_rcv_gen_queries);
 	}
 
 out_locked:
 	IGMP_UNLOCK();
 	IN_MULTI_UNLOCK();
 
 	return (0);
 }
 
 /*
  * Update the report timer on a group in response to an IGMPv2 query.
  *
  * If we are becoming the reporting member for this group, start the timer.
  * If we already are the reporting member for this group, and timer is
  * below the threshold, reset it.
  *
  * We may be updating the group for the first time since we switched
  * to IGMPv3. If we are, then we must clear any recorded source lists,
  * and transition to REPORTING state; the group timer is overloaded
  * for group and group-source query responses. 
  *
  * Unlike IGMPv3, the delay per group should be jittered
  * to avoid bursts of IGMPv2 reports.
  */
 static void
 igmp_v2_update_group(struct in_multi *inm, const int timer)
 {
 	INIT_VNET_INET(curvnet);
 
 	CTR4(KTR_IGMPV3, "%s: %s/%s timer=%d", __func__,
 	    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname, timer);
 
 	IN_MULTI_LOCK_ASSERT();
 
 	switch (inm->inm_state) {
 	case IGMP_NOT_MEMBER:
 	case IGMP_SILENT_MEMBER:
 		break;
 	case IGMP_REPORTING_MEMBER:
 		if (inm->inm_timer != 0 &&
 		    inm->inm_timer <= timer) {
 			CTR1(KTR_IGMPV3, "%s: REPORTING and timer running, "
 			    "skipping.", __func__);
 			break;
 		}
 		/* FALLTHROUGH */
 	case IGMP_SG_QUERY_PENDING_MEMBER:
 	case IGMP_G_QUERY_PENDING_MEMBER:
 	case IGMP_IDLE_MEMBER:
 	case IGMP_LAZY_MEMBER:
 	case IGMP_AWAKENING_MEMBER:
 		CTR1(KTR_IGMPV3, "%s: ->REPORTING", __func__);
 		inm->inm_state = IGMP_REPORTING_MEMBER;
 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
 		V_current_state_timers_running = 1;
 		break;
 	case IGMP_SLEEPING_MEMBER:
 		CTR1(KTR_IGMPV3, "%s: ->AWAKENING", __func__);
 		inm->inm_state = IGMP_AWAKENING_MEMBER;
 		break;
 	case IGMP_LEAVING_MEMBER:
 		break;
 	}
 }
 
 /*
  * Process a received IGMPv3 general, group-specific or
  * group-and-source-specific query.
  * Assumes m has already been pulled up to the full IGMP message length.
  * Return 0 if successful, otherwise an appropriate error code is returned.
  */
 static int
 igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip,
     /*const*/ struct igmpv3 *igmpv3)
 {
 	INIT_VNET_INET(ifp->if_vnet);
 	struct igmp_ifinfo	*igi;
 	struct in_multi		*inm;
 	uint32_t		 maxresp, nsrc, qqi;
 	uint16_t		 timer;
 	uint8_t			 qrv;
 
 	CTR2(KTR_IGMPV3, "process v3 query on ifp %p(%s)", ifp, ifp->if_xname);
 
 	maxresp = igmpv3->igmp_code;	/* in 1/10ths of a second */
 	if (maxresp >= 128) {
 		maxresp = IGMP_MANT(igmpv3->igmp_code) <<
 			  (IGMP_EXP(igmpv3->igmp_code) + 3);
 	}
 
 	/*
 	 * Robustness must never be less than 2 for on-wire IGMPv3.
 	 * FIXME: Check if ifp has IGIF_LOOPBACK set, as we make
 	 * an exception for interfaces whose IGMPv3 state changes
 	 * are redirected to loopback (e.g. MANET).
 	 */
 	qrv = IGMP_QRV(igmpv3->igmp_misc);
 	if (qrv < 2) {
 		CTR3(KTR_IGMPV3, "%s: clamping qrv %d to %d", __func__,
 		    qrv, IGMP_RV_INIT);
 		qrv = IGMP_RV_INIT;
 	}
 
 	qqi = igmpv3->igmp_qqi;
 	if (qqi >= 128) {
 		qqi = IGMP_MANT(igmpv3->igmp_qqi) <<
 		     (IGMP_EXP(igmpv3->igmp_qqi) + 3);
 	}
 
 	timer = maxresp * PR_FASTHZ / IGMP_TIMER_SCALE;
 	if (timer == 0)
 		timer = 1;
 
 	nsrc = ntohs(igmpv3->igmp_numsrc);
 
 	IN_MULTI_LOCK();
 	IGMP_LOCK();
 
 	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
 	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
 
 	if (igi->igi_flags & IGIF_LOOPBACK) {
 		CTR2(KTR_IGMPV3, "ignore v3 query on IGIF_LOOPBACK ifp %p(%s)",
 		    ifp, ifp->if_xname);
 		goto out_locked;
 	}
 
 	igmp_set_version(igi, IGMP_VERSION_3);
 
 	igi->igi_rv = qrv;
 	igi->igi_qi = qqi;
 	igi->igi_qri = maxresp;
 
 	CTR4(KTR_IGMPV3, "%s: qrv %d qi %d qri %d", __func__, qrv, qqi,
 	    maxresp);
 
 	if (in_nullhost(igmpv3->igmp_group)) {
 		/*
 		 * IGMPv3 General Query.
 		 * Schedule a current-state report on this ifp for
 		 * all groups, possibly containing source lists.
 		 */
 		IGMPSTAT_INC(igps_rcv_gen_queries);
 
 		if (!in_allhosts(ip->ip_dst) || nsrc > 0) {
 			/*
 			 * General Queries SHOULD be directed to 224.0.0.1.
 			 * A general query with a source list has undefined
 			 * behaviour; discard it.
 			 */
 			IGMPSTAT_INC(igps_rcv_badqueries);
 			goto out_locked;
 		}
 
 		CTR2(KTR_IGMPV3, "process v3 general query on ifp %p(%s)",
 		    ifp, ifp->if_xname);
 
 		/*
 		 * If there is a pending General Query response
 		 * scheduled earlier than the selected delay, do
 		 * not schedule any other reports.
 		 * Otherwise, reset the interface timer.
 		 */
 		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) {
 			igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer);
 			V_interface_timers_running = 1;
 		}
 	} else {
 		/*
 		 * IGMPv3 Group-specific or Group-and-source-specific Query.
 		 *
 		 * Group-source-specific queries are throttled on
 		 * a per-group basis to defeat denial-of-service attempts.
 		 * Queries for groups we are not a member of on this
 		 * link are simply ignored.
 		 */
 		inm = inm_lookup(ifp, igmpv3->igmp_group);
 		if (inm == NULL)
 			goto out_locked;
 		if (nsrc > 0) {
 			IGMPSTAT_INC(igps_rcv_gsr_queries);
 			if (!ratecheck(&inm->inm_lastgsrtv,
 			    &V_igmp_gsrdelay)) {
 				CTR1(KTR_IGMPV3, "%s: GS query throttled.",
 				    __func__);
 				IGMPSTAT_INC(igps_drop_gsr_queries);
 				goto out_locked;
 			}
 		} else {
 			IGMPSTAT_INC(igps_rcv_group_queries);
 		}
 		CTR3(KTR_IGMPV3, "process v3 %s query on ifp %p(%s)",
 		     inet_ntoa(igmpv3->igmp_group), ifp, ifp->if_xname);
 		/*
 		 * If there is a pending General Query response
 		 * scheduled sooner than the selected delay, no
 		 * further report need be scheduled.
 		 * Otherwise, prepare to respond to the
 		 * group-specific or group-and-source query.
 		 */
 		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer)
 			igmp_input_v3_group_query(inm, igi, timer, igmpv3);
 	}
 
 out_locked:
 	IGMP_UNLOCK();
 	IN_MULTI_UNLOCK();
 
 	return (0);
 }
 
 /*
  * Process a recieved IGMPv3 group-specific or group-and-source-specific
  * query.
  * Return <0 if any error occured. Currently this is ignored.
  */
 static int
 igmp_input_v3_group_query(struct in_multi *inm, struct igmp_ifinfo *igi,
     int timer, /*const*/ struct igmpv3 *igmpv3)
 {
 	INIT_VNET_INET(curvnet);
 	int			 retval;
 	uint16_t		 nsrc;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	retval = 0;
 
 	switch (inm->inm_state) {
 	case IGMP_NOT_MEMBER:
 	case IGMP_SILENT_MEMBER:
 	case IGMP_SLEEPING_MEMBER:
 	case IGMP_LAZY_MEMBER:
 	case IGMP_AWAKENING_MEMBER:
 	case IGMP_IDLE_MEMBER:
 	case IGMP_LEAVING_MEMBER:
 		return (retval);
 		break;
 	case IGMP_REPORTING_MEMBER:
 	case IGMP_G_QUERY_PENDING_MEMBER:
 	case IGMP_SG_QUERY_PENDING_MEMBER:
 		break;
 	}
 
 	nsrc = ntohs(igmpv3->igmp_numsrc);
 
 	/*
 	 * Deal with group-specific queries upfront.
 	 * If any group query is already pending, purge any recorded
 	 * source-list state if it exists, and schedule a query response
 	 * for this group-specific query.
 	 */
 	if (nsrc == 0) {
 		if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
 		    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
 			inm_clear_recorded(inm);
 			timer = min(inm->inm_timer, timer);
 		}
 		inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER;
 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
 		V_current_state_timers_running = 1;
 		return (retval);
 	}
 
 	/*
 	 * Deal with the case where a group-and-source-specific query has
 	 * been received but a group-specific query is already pending.
 	 */
 	if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) {
 		timer = min(inm->inm_timer, timer);
 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
 		V_current_state_timers_running = 1;
 		return (retval);
 	}
 
 	/*
 	 * Finally, deal with the case where a group-and-source-specific
 	 * query has been received, where a response to a previous g-s-r
 	 * query exists, or none exists.
 	 * In this case, we need to parse the source-list which the Querier
 	 * has provided us with and check if we have any source list filter
 	 * entries at T1 for these sources. If we do not, there is no need
 	 * schedule a report and the query may be dropped.
 	 * If we do, we must record them and schedule a current-state
 	 * report for those sources.
 	 * FIXME: Handling source lists larger than 1 mbuf requires that
 	 * we pass the mbuf chain pointer down to this function, and use
 	 * m_getptr() to walk the chain.
 	 */
 	if (inm->inm_nsrc > 0) {
 		const struct in_addr	*ap;
 		int			 i, nrecorded;
 
 		ap = (const struct in_addr *)(igmpv3 + 1);
 		nrecorded = 0;
 		for (i = 0; i < nsrc; i++, ap++) {
 			retval = inm_record_source(inm, ap->s_addr);
 			if (retval < 0)
 				break;
 			nrecorded += retval;
 		}
 		if (nrecorded > 0) {
 			CTR1(KTR_IGMPV3,
 			    "%s: schedule response to SG query", __func__);
 			inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER;
 			inm->inm_timer = IGMP_RANDOM_DELAY(timer);
 			V_current_state_timers_running = 1;
 		}
 	}
 
 	return (retval);
 }
 
 /*
  * Process a received IGMPv1 host membership report.
  *
  * NOTE: 0.0.0.0 workaround breaks const correctness.
  */
 static int
 igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip,
     /*const*/ struct igmp *igmp)
 {
 	INIT_VNET_INET(ifp->if_vnet);
 	struct in_ifaddr *ia;
 	struct in_multi *inm;
 
 	IGMPSTAT_INC(igps_rcv_reports);
 
 	if (ifp->if_flags & IFF_LOOPBACK)
 		return (0);
 
 	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr) ||
 	    !in_hosteq(igmp->igmp_group, ip->ip_dst))) {
 		IGMPSTAT_INC(igps_rcv_badreports);
 		return (EINVAL);
 	}
 
 	/*
 	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
 	 * Booting clients may use the source address 0.0.0.0. Some
 	 * IGMP daemons may not know how to use IP_RECVIF to determine
 	 * the interface upon which this message was received.
 	 * Replace 0.0.0.0 with the subnet address if told to do so.
 	 */
 	if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) {
 		IFP_TO_IA(ifp, ia);
 		if (ia != NULL)
 			ip->ip_src.s_addr = htonl(ia->ia_subnet);
 	}
 
 	CTR3(KTR_IGMPV3, "process v1 report %s on ifp %p(%s)",
 	     inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
 
 	/*
 	 * IGMPv1 report suppression.
 	 * If we are a member of this group, and our membership should be
 	 * reported, stop our group timer and transition to the 'lazy' state.
 	 */
 	IN_MULTI_LOCK();
 	inm = inm_lookup(ifp, igmp->igmp_group);
 	if (inm != NULL) {
 		struct igmp_ifinfo *igi;
 
 		igi = inm->inm_igi;
 		if (igi == NULL) {
 			KASSERT(igi != NULL,
 			    ("%s: no igi for ifp %p", __func__, ifp));
 			goto out_locked;
 		}
 
 		IGMPSTAT_INC(igps_rcv_ourreports);
 
 		/*
 		 * If we are in IGMPv3 host mode, do not allow the
 		 * other host's IGMPv1 report to suppress our reports
 		 * unless explicitly configured to do so.
 		 */
 		if (igi->igi_version == IGMP_VERSION_3) {
 			if (V_igmp_legacysupp)
 				igmp_v3_suppress_group_record(inm);
 			goto out_locked;
 		}
 
 		inm->inm_timer = 0;
 
 		switch (inm->inm_state) {
 		case IGMP_NOT_MEMBER:
 		case IGMP_SILENT_MEMBER:
 			break;
 		case IGMP_IDLE_MEMBER:
 		case IGMP_LAZY_MEMBER:
 		case IGMP_AWAKENING_MEMBER:
 			CTR3(KTR_IGMPV3,
 			    "report suppressed for %s on ifp %p(%s)",
 			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
 		case IGMP_SLEEPING_MEMBER:
 			inm->inm_state = IGMP_SLEEPING_MEMBER;
 			break;
 		case IGMP_REPORTING_MEMBER:
 			CTR3(KTR_IGMPV3,
 			    "report suppressed for %s on ifp %p(%s)",
 			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
 			if (igi->igi_version == IGMP_VERSION_1)
 				inm->inm_state = IGMP_LAZY_MEMBER;
 			else if (igi->igi_version == IGMP_VERSION_2)
 				inm->inm_state = IGMP_SLEEPING_MEMBER;
 			break;
 		case IGMP_G_QUERY_PENDING_MEMBER:
 		case IGMP_SG_QUERY_PENDING_MEMBER:
 		case IGMP_LEAVING_MEMBER:
 			break;
 		}
 	}
 
 out_locked:
 	IN_MULTI_UNLOCK();
 
 	return (0);
 }
 
 /*
  * Process a received IGMPv2 host membership report.
  *
  * NOTE: 0.0.0.0 workaround breaks const correctness.
  */
 static int
 igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip,
     /*const*/ struct igmp *igmp)
 {
 	INIT_VNET_INET(ifp->if_vnet);
 	struct in_ifaddr *ia;
 	struct in_multi *inm;
 
 	/*
 	 * Make sure we don't hear our own membership report.  Fast
 	 * leave requires knowing that we are the only member of a
 	 * group.
 	 */
 	IFP_TO_IA(ifp, ia);
 	if (ia != NULL && in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr))
 		return (0);
 
 	IGMPSTAT_INC(igps_rcv_reports);
 
 	if (ifp->if_flags & IFF_LOOPBACK)
 		return (0);
 
 	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
 	    !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
 		IGMPSTAT_INC(igps_rcv_badreports);
 		return (EINVAL);
 	}
 
 	/*
 	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
 	 * Booting clients may use the source address 0.0.0.0. Some
 	 * IGMP daemons may not know how to use IP_RECVIF to determine
 	 * the interface upon which this message was received.
 	 * Replace 0.0.0.0 with the subnet address if told to do so.
 	 */
 	if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) {
 		if (ia != NULL)
 			ip->ip_src.s_addr = htonl(ia->ia_subnet);
 	}
 
 	CTR3(KTR_IGMPV3, "process v2 report %s on ifp %p(%s)",
 	     inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
 
 	/*
 	 * IGMPv2 report suppression.
 	 * If we are a member of this group, and our membership should be
 	 * reported, and our group timer is pending or about to be reset,
 	 * stop our group timer by transitioning to the 'lazy' state.
 	 */
 	IN_MULTI_LOCK();
 	inm = inm_lookup(ifp, igmp->igmp_group);
 	if (inm != NULL) {
 		struct igmp_ifinfo *igi;
 
 		igi = inm->inm_igi;
 		KASSERT(igi != NULL, ("%s: no igi for ifp %p", __func__, ifp));
 
 		IGMPSTAT_INC(igps_rcv_ourreports);
 
 		/*
 		 * If we are in IGMPv3 host mode, do not allow the
 		 * other host's IGMPv1 report to suppress our reports
 		 * unless explicitly configured to do so.
 		 */
 		if (igi->igi_version == IGMP_VERSION_3) {
 			if (V_igmp_legacysupp)
 				igmp_v3_suppress_group_record(inm);
 			goto out_locked;
 		}
 
 		inm->inm_timer = 0;
 
 		switch (inm->inm_state) {
 		case IGMP_NOT_MEMBER:
 		case IGMP_SILENT_MEMBER:
 		case IGMP_SLEEPING_MEMBER:
 			break;
 		case IGMP_REPORTING_MEMBER:
 		case IGMP_IDLE_MEMBER:
 		case IGMP_AWAKENING_MEMBER:
 			CTR3(KTR_IGMPV3,
 			    "report suppressed for %s on ifp %p(%s)",
 			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
 		case IGMP_LAZY_MEMBER:
 			inm->inm_state = IGMP_LAZY_MEMBER;
 			break;
 		case IGMP_G_QUERY_PENDING_MEMBER:
 		case IGMP_SG_QUERY_PENDING_MEMBER:
 		case IGMP_LEAVING_MEMBER:
 			break;
 		}
 	}
 
 out_locked:
 	IN_MULTI_UNLOCK();
 
 	return (0);
 }
 
 void
 igmp_input(struct mbuf *m, int off)
 {
 	int iphlen;
 	struct ifnet *ifp;
 	struct igmp *igmp;
 	struct ip *ip;
 	int igmplen;
 	int minlen;
 	int queryver;
 
 	CTR3(KTR_IGMPV3, "%s: called w/mbuf (%p,%d)", __func__, m, off);
 
 	ifp = m->m_pkthdr.rcvif;
 	INIT_VNET_INET(ifp->if_vnet);
 
 	IGMPSTAT_INC(igps_rcv_total);
 
 	ip = mtod(m, struct ip *);
 	iphlen = off;
 	igmplen = ip->ip_len;
 
 	/*
 	 * Validate lengths.
 	 */
 	if (igmplen < IGMP_MINLEN) {
 		IGMPSTAT_INC(igps_rcv_tooshort);
 		m_freem(m);
 		return;
 	}
 
 	/*
 	 * Always pullup to the minimum size for v1/v2 or v3
 	 * to amortize calls to m_pullup().
 	 */
 	minlen = iphlen;
 	if (igmplen >= IGMP_V3_QUERY_MINLEN)
 		minlen += IGMP_V3_QUERY_MINLEN;
 	else
 		minlen += IGMP_MINLEN;
 	if ((m->m_flags & M_EXT || m->m_len < minlen) &&
 	    (m = m_pullup(m, minlen)) == 0) {
 		IGMPSTAT_INC(igps_rcv_tooshort);
 		return;
 	}
 	ip = mtod(m, struct ip *);
 
 	if (ip->ip_ttl != 1) {
 		IGMPSTAT_INC(igps_rcv_badttl);
 		m_freem(m);
 		return;
 	}
 
 	/*
 	 * Validate checksum.
 	 */
 	m->m_data += iphlen;
 	m->m_len -= iphlen;
 	igmp = mtod(m, struct igmp *);
 	if (in_cksum(m, igmplen)) {
 		IGMPSTAT_INC(igps_rcv_badsum);
 		m_freem(m);
 		return;
 	}
 	m->m_data -= iphlen;
 	m->m_len += iphlen;
 
 	switch (igmp->igmp_type) {
 	case IGMP_HOST_MEMBERSHIP_QUERY:
 		if (igmplen == IGMP_MINLEN) {
 			if (igmp->igmp_code == 0)
 				queryver = IGMP_VERSION_1;
 			else
 				queryver = IGMP_VERSION_2;
 		} else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
 			queryver = IGMP_VERSION_3;
 		} else {
 			IGMPSTAT_INC(igps_rcv_tooshort);
 			m_freem(m);
 			return;
 		}
 
 		switch (queryver) {
 		case IGMP_VERSION_1:
 			IGMPSTAT_INC(igps_rcv_v1v2_queries);
 			if (!V_igmp_v1enable)
 				break;
 			if (igmp_input_v1_query(ifp, ip) != 0) {
 				m_freem(m);
 				return;
 			}
 			break;
 
 		case IGMP_VERSION_2:
 			IGMPSTAT_INC(igps_rcv_v1v2_queries);
 			if (!V_igmp_v2enable)
 				break;
 			if (igmp_input_v2_query(ifp, ip, igmp) != 0) {
 				m_freem(m);
 				return;
 			}
 			break;
 
 		case IGMP_VERSION_3: {
 				struct igmpv3 *igmpv3;
 				uint16_t igmpv3len;
 				uint16_t srclen;
 				int nsrc;
 
 				IGMPSTAT_INC(igps_rcv_v3_queries);
 				igmpv3 = (struct igmpv3 *)igmp;
 				/*
 				 * Validate length based on source count.
 				 */
 				nsrc = ntohs(igmpv3->igmp_numsrc);
 				srclen = sizeof(struct in_addr) * nsrc;
 				if (nsrc * sizeof(in_addr_t) > srclen) {
 					IGMPSTAT_INC(igps_rcv_tooshort);
 					return;
 				}
 				/*
 				 * m_pullup() may modify m, so pullup in
 				 * this scope.
 				 */
 				igmpv3len = iphlen + IGMP_V3_QUERY_MINLEN +
 				    srclen;
 				if ((m->m_flags & M_EXT ||
 				     m->m_len < igmpv3len) &&
 				    (m = m_pullup(m, igmpv3len)) == NULL) {
 					IGMPSTAT_INC(igps_rcv_tooshort);
 					return;
 				}
 				igmpv3 = (struct igmpv3 *)(mtod(m, uint8_t *)
 				    + iphlen);
 				if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
 					m_freem(m);
 					return;
 				}
 			}
 			break;
 		}
 		break;
 
 	case IGMP_v1_HOST_MEMBERSHIP_REPORT:
 		if (!V_igmp_v1enable)
 			break;
 		if (igmp_input_v1_report(ifp, ip, igmp) != 0) {
 			m_freem(m);
 			return;
 		}
 		break;
 
 	case IGMP_v2_HOST_MEMBERSHIP_REPORT:
 		if (!V_igmp_v2enable)
 			break;
 		if (!ip_checkrouteralert(m))
 			IGMPSTAT_INC(igps_rcv_nora);
 		if (igmp_input_v2_report(ifp, ip, igmp) != 0) {
 			m_freem(m);
 			return;
 		}
 		break;
 
 	case IGMP_v3_HOST_MEMBERSHIP_REPORT:
 		/*
 		 * Hosts do not need to process IGMPv3 membership reports,
 		 * as report suppression is no longer required.
 		 */
 		if (!ip_checkrouteralert(m))
 			IGMPSTAT_INC(igps_rcv_nora);
 		break;
 
 	default:
 		break;
 	}
 
 	/*
 	 * Pass all valid IGMP packets up to any process(es) listening on a
 	 * raw IGMP socket.
 	 */
 	rip_input(m, off);
 }
 
 
 /*
  * Fast timeout handler (global).
  * VIMAGE: Timeout handlers are expected to service all vimages.
  */
 void
 igmp_fasttimo(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	VNET_LIST_RLOCK();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		igmp_fasttimo_vnet();
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK();
 }
 
 /*
  * Fast timeout handler (per-vnet).
  * Sends are shuffled off to a netisr to deal with Giant.
  *
  * VIMAGE: Assume caller has set up our curvnet.
  */
 static void
 igmp_fasttimo_vnet(void)
 {
 	INIT_VNET_INET(curvnet);
 	struct ifqueue		 scq;	/* State-change packets */
 	struct ifqueue		 qrq;	/* Query response packets */
 	struct ifnet		*ifp;
 	struct igmp_ifinfo	*igi;
 	struct ifmultiaddr	*ifma, *tifma;
 	struct in_multi		*inm;
 	int			 loop, uri_fasthz;
 
 	loop = 0;
 	uri_fasthz = 0;
 
 	/*
 	 * Quick check to see if any work needs to be done, in order to
 	 * minimize the overhead of fasttimo processing.
 	 * SMPng: XXX Unlocked reads.
 	 */
 	if (!V_current_state_timers_running &&
 	    !V_interface_timers_running &&
 	    !V_state_change_timers_running)
 		return;
 
 	IN_MULTI_LOCK();
 	IGMP_LOCK();
 
 	/*
 	 * IGMPv3 General Query response timer processing.
 	 */
 	if (V_interface_timers_running) {
 		CTR1(KTR_IGMPV3, "%s: interface timers running", __func__);
 
 		V_interface_timers_running = 0;
 		LIST_FOREACH(igi, &V_igi_head, igi_link) {
 			if (igi->igi_v3_timer == 0) {
 				/* Do nothing. */
 			} else if (--igi->igi_v3_timer == 0) {
 				igmp_v3_dispatch_general_query(igi);
 			} else {
 				V_interface_timers_running = 1;
 			}
 		}
 	}
 
 	if (!V_current_state_timers_running &&
 	    !V_state_change_timers_running)
 		goto out_locked;
 
 	V_current_state_timers_running = 0;
 	V_state_change_timers_running = 0;
 
 	CTR1(KTR_IGMPV3, "%s: state change timers running", __func__);
 
 	/*
 	 * IGMPv1/v2/v3 host report and state-change timer processing.
 	 * Note: Processing a v3 group timer may remove a node.
 	 */
 	LIST_FOREACH(igi, &V_igi_head, igi_link) {
 		ifp = igi->igi_ifp;
 
 		if (igi->igi_version == IGMP_VERSION_3) {
 			loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
 			uri_fasthz = IGMP_RANDOM_DELAY(igi->igi_uri *
 			    PR_FASTHZ);
 
 			memset(&qrq, 0, sizeof(struct ifqueue));
 			IFQ_SET_MAXLEN(&qrq, IGMP_MAX_G_GS_PACKETS);
 
 			memset(&scq, 0, sizeof(struct ifqueue));
 			IFQ_SET_MAXLEN(&scq, IGMP_MAX_STATE_CHANGE_PACKETS);
 		}
 
 		IF_ADDR_LOCK(ifp);
 		TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link,
 		    tifma) {
 			if (ifma->ifma_addr->sa_family != AF_INET ||
 			    ifma->ifma_protospec == NULL)
 				continue;
 			inm = (struct in_multi *)ifma->ifma_protospec;
 			switch (igi->igi_version) {
 			case IGMP_VERSION_1:
 			case IGMP_VERSION_2:
 				igmp_v1v2_process_group_timer(inm,
 				    igi->igi_version);
 				break;
 			case IGMP_VERSION_3:
 				igmp_v3_process_group_timers(igi, &qrq,
 				    &scq, inm, uri_fasthz);
 				break;
 			}
 		}
 		IF_ADDR_UNLOCK(ifp);
 
 		if (igi->igi_version == IGMP_VERSION_3) {
 			struct in_multi		*tinm;
 
 			igmp_dispatch_queue(&qrq, 0, loop);
 			igmp_dispatch_queue(&scq, 0, loop);
 
 			/*
 			 * Free the in_multi reference(s) for this
 			 * IGMP lifecycle.
 			 */
 			SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead,
 			    inm_nrele, tinm) {
 				SLIST_REMOVE_HEAD(&igi->igi_relinmhead,
 				    inm_nrele);
 				inm_release_locked(inm);
 			}
 		}
 	}
 
 out_locked:
 	IGMP_UNLOCK();
 	IN_MULTI_UNLOCK();
 }
 
 /*
  * Update host report group timer for IGMPv1/v2.
  * Will update the global pending timer flags.
  */
 static void
 igmp_v1v2_process_group_timer(struct in_multi *inm, const int version)
 {
 	INIT_VNET_INET(curvnet);
 	int report_timer_expired;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	if (inm->inm_timer == 0) {
 		report_timer_expired = 0;
 	} else if (--inm->inm_timer == 0) {
 		report_timer_expired = 1;
 	} else {
 		V_current_state_timers_running = 1;
 		return;
 	}
 
 	switch (inm->inm_state) {
 	case IGMP_NOT_MEMBER:
 	case IGMP_SILENT_MEMBER:
 	case IGMP_IDLE_MEMBER:
 	case IGMP_LAZY_MEMBER:
 	case IGMP_SLEEPING_MEMBER:
 	case IGMP_AWAKENING_MEMBER:
 		break;
 	case IGMP_REPORTING_MEMBER:
 		if (report_timer_expired) {
 			inm->inm_state = IGMP_IDLE_MEMBER;
 			(void)igmp_v1v2_queue_report(inm,
 			    (version == IGMP_VERSION_2) ?
 			     IGMP_v2_HOST_MEMBERSHIP_REPORT :
 			     IGMP_v1_HOST_MEMBERSHIP_REPORT);
 		}
 		break;
 	case IGMP_G_QUERY_PENDING_MEMBER:
 	case IGMP_SG_QUERY_PENDING_MEMBER:
 	case IGMP_LEAVING_MEMBER:
 		break;
 	}
 }
 
 /*
  * Update a group's timers for IGMPv3.
  * Will update the global pending timer flags.
  * Note: Unlocked read from igi.
  */
 static void
 igmp_v3_process_group_timers(struct igmp_ifinfo *igi,
     struct ifqueue *qrq, struct ifqueue *scq,
     struct in_multi *inm, const int uri_fasthz)
 {
 	INIT_VNET_INET(curvnet);
 	int query_response_timer_expired;
 	int state_change_retransmit_timer_expired;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	query_response_timer_expired = 0;
 	state_change_retransmit_timer_expired = 0;
 
 	/*
 	 * During a transition from v1/v2 compatibility mode back to v3,
 	 * a group record in REPORTING state may still have its group
 	 * timer active. This is a no-op in this function; it is easier
 	 * to deal with it here than to complicate the slow-timeout path.
 	 */
 	if (inm->inm_timer == 0) {
 		query_response_timer_expired = 0;
 	} else if (--inm->inm_timer == 0) {
 		query_response_timer_expired = 1;
 	} else {
 		V_current_state_timers_running = 1;
 	}
 
 	if (inm->inm_sctimer == 0) {
 		state_change_retransmit_timer_expired = 0;
 	} else if (--inm->inm_sctimer == 0) {
 		state_change_retransmit_timer_expired = 1;
 	} else {
 		V_state_change_timers_running = 1;
 	}
 
 	/* We are in fasttimo, so be quick about it. */
 	if (!state_change_retransmit_timer_expired &&
 	    !query_response_timer_expired)
 		return;
 
 	switch (inm->inm_state) {
 	case IGMP_NOT_MEMBER:
 	case IGMP_SILENT_MEMBER:
 	case IGMP_SLEEPING_MEMBER:
 	case IGMP_LAZY_MEMBER:
 	case IGMP_AWAKENING_MEMBER:
 	case IGMP_IDLE_MEMBER:
 		break;
 	case IGMP_G_QUERY_PENDING_MEMBER:
 	case IGMP_SG_QUERY_PENDING_MEMBER:
 		/*
 		 * Respond to a previously pending Group-Specific
 		 * or Group-and-Source-Specific query by enqueueing
 		 * the appropriate Current-State report for
 		 * immediate transmission.
 		 */
 		if (query_response_timer_expired) {
 			int retval;
 
 			retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1,
 			    (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER));
 			CTR2(KTR_IGMPV3, "%s: enqueue record = %d",
 			    __func__, retval);
 			inm->inm_state = IGMP_REPORTING_MEMBER;
 			/* XXX Clear recorded sources for next time. */
 			inm_clear_recorded(inm);
 		}
 		/* FALLTHROUGH */
 	case IGMP_REPORTING_MEMBER:
 	case IGMP_LEAVING_MEMBER:
 		if (state_change_retransmit_timer_expired) {
 			/*
 			 * State-change retransmission timer fired.
 			 * If there are any further pending retransmissions,
 			 * set the global pending state-change flag, and
 			 * reset the timer.
 			 */
 			if (--inm->inm_scrv > 0) {
 				inm->inm_sctimer = uri_fasthz;
 				V_state_change_timers_running = 1;
 			}
 			/*
 			 * Retransmit the previously computed state-change
 			 * report. If there are no further pending
 			 * retransmissions, the mbuf queue will be consumed.
 			 * Update T0 state to T1 as we have now sent
 			 * a state-change.
 			 */
 			(void)igmp_v3_merge_state_changes(inm, scq);
 
 			inm_commit(inm);
 			CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__,
 			    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
 
 			/*
 			 * If we are leaving the group for good, make sure
 			 * we release IGMP's reference to it.
 			 * This release must be deferred using a SLIST,
 			 * as we are called from a loop which traverses
 			 * the in_ifmultiaddr TAILQ.
 			 */
 			if (inm->inm_state == IGMP_LEAVING_MEMBER &&
 			    inm->inm_scrv == 0) {
 				inm->inm_state = IGMP_NOT_MEMBER;
 				SLIST_INSERT_HEAD(&igi->igi_relinmhead,
 				    inm, inm_nrele);
 			}
 		}
 		break;
 	}
 }
 
 
 /*
  * Suppress a group's pending response to a group or source/group query.
  *
  * Do NOT suppress state changes. This leads to IGMPv3 inconsistency.
  * Do NOT update ST1/ST0 as this operation merely suppresses
  * the currently pending group record.
  * Do NOT suppress the response to a general query. It is possible but
  * it would require adding another state or flag.
  */
 static void
 igmp_v3_suppress_group_record(struct in_multi *inm)
 {
 
 	IN_MULTI_LOCK_ASSERT();
 
 	KASSERT(inm->inm_igi->igi_version == IGMP_VERSION_3,
 		("%s: not IGMPv3 mode on link", __func__));
 
 	if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER ||
 	    inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER)
 		return;
 
 	if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER)
 		inm_clear_recorded(inm);
 
 	inm->inm_timer = 0;
 	inm->inm_state = IGMP_REPORTING_MEMBER;
 }
 
 /*
  * Switch to a different IGMP version on the given interface,
  * as per Section 7.2.1.
  */
 static void
 igmp_set_version(struct igmp_ifinfo *igi, const int version)
 {
 
 	IGMP_LOCK_ASSERT();
 
 	CTR4(KTR_IGMPV3, "%s: switching to v%d on ifp %p(%s)", __func__,
 	    version, igi->igi_ifp, igi->igi_ifp->if_xname);
 
 	if (version == IGMP_VERSION_1 || version == IGMP_VERSION_2) {
 		int old_version_timer;
 		/*
 		 * Compute the "Older Version Querier Present" timer as per
 		 * Section 8.12.
 		 */
 		old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri;
 		old_version_timer *= PR_SLOWHZ;
 
 		if (version == IGMP_VERSION_1) {
 			igi->igi_v1_timer = old_version_timer;
 			igi->igi_v2_timer = 0;
 		} else if (version == IGMP_VERSION_2) {
 			igi->igi_v1_timer = 0;
 			igi->igi_v2_timer = old_version_timer;
 		}
 	}
 
 	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
 		if (igi->igi_version != IGMP_VERSION_2) {
 			igi->igi_version = IGMP_VERSION_2;
 			igmp_v3_cancel_link_timers(igi);
 		}
 	} else if (igi->igi_v1_timer > 0) {
 		if (igi->igi_version != IGMP_VERSION_1) {
 			igi->igi_version = IGMP_VERSION_1;
 			igmp_v3_cancel_link_timers(igi);
 		}
 	}
 }
 
 /*
  * Cancel pending IGMPv3 timers for the given link and all groups
  * joined on it; state-change, general-query, and group-query timers.
  */
 static void
 igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi)
 {
 	INIT_VNET_INET(curvnet);
 	struct ifmultiaddr	*ifma;
 	struct ifnet		*ifp;
 	struct in_multi		*inm;
 
 	CTR3(KTR_IGMPV3, "%s: cancel v3 timers on ifp %p(%s)", __func__,
 	    igi->igi_ifp, igi->igi_ifp->if_xname);
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	/*
 	 * Fast-track this potentially expensive operation
 	 * by checking all the global 'timer pending' flags.
 	 */
 	if (!V_interface_timers_running &&
 	    !V_state_change_timers_running &&
 	    !V_current_state_timers_running)
 		return;
 
 	igi->igi_v3_timer = 0;
 
 	ifp = igi->igi_ifp;
 
 	IF_ADDR_LOCK(ifp);
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_INET)
 			continue;
 		inm = (struct in_multi *)ifma->ifma_protospec;
 		switch (inm->inm_state) {
 		case IGMP_NOT_MEMBER:
 		case IGMP_SILENT_MEMBER:
 		case IGMP_IDLE_MEMBER:
 		case IGMP_LAZY_MEMBER:
 		case IGMP_SLEEPING_MEMBER:
 		case IGMP_AWAKENING_MEMBER:
 			break;
 		case IGMP_LEAVING_MEMBER:
 			/*
 			 * If we are leaving the group and switching
 			 * IGMP version, we need to release the final
 			 * reference held for issuing the INCLUDE {}.
 			 *
 			 * SMPNG: Must drop and re-acquire IF_ADDR_LOCK
 			 * around inm_release_locked(), as it is not
 			 * a recursive mutex.
 			 */
 			IF_ADDR_UNLOCK(ifp);
 			inm_release_locked(inm);
 			IF_ADDR_LOCK(ifp);
 			/* FALLTHROUGH */
 		case IGMP_G_QUERY_PENDING_MEMBER:
 		case IGMP_SG_QUERY_PENDING_MEMBER:
 			inm_clear_recorded(inm);
 			/* FALLTHROUGH */
 		case IGMP_REPORTING_MEMBER:
 			inm->inm_sctimer = 0;
 			inm->inm_timer = 0;
 			inm->inm_state = IGMP_REPORTING_MEMBER;
 			/*
 			 * Free any pending IGMPv3 state-change records.
 			 */
 			_IF_DRAIN(&inm->inm_scq);
 			break;
 		}
 	}
 	IF_ADDR_UNLOCK(ifp);
 }
 
 /*
  * Update the Older Version Querier Present timers for a link.
  * See Section 7.2.1 of RFC 3376.
  */
 static void
 igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi)
 {
 	INIT_VNET_INET(curvnet);
 
 	IGMP_LOCK_ASSERT();
 
 	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) {
 		/*
 		 * IGMPv1 and IGMPv2 Querier Present timers expired.
 		 *
 		 * Revert to IGMPv3.
 		 */
 		if (igi->igi_version != IGMP_VERSION_3) {
 			CTR5(KTR_IGMPV3,
 			    "%s: transition from v%d -> v%d on %p(%s)",
 			    __func__, igi->igi_version, IGMP_VERSION_3,
 			    igi->igi_ifp, igi->igi_ifp->if_xname);
 			igi->igi_version = IGMP_VERSION_3;
 		}
 	} else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
 		/*
 		 * IGMPv1 Querier Present timer expired,
 		 * IGMPv2 Querier Present timer running.
 		 * If IGMPv2 was disabled since last timeout,
 		 * revert to IGMPv3.
 		 * If IGMPv2 is enabled, revert to IGMPv2.
 		 */
 		if (!V_igmp_v2enable) {
 			CTR5(KTR_IGMPV3,
 			    "%s: transition from v%d -> v%d on %p(%s)",
 			    __func__, igi->igi_version, IGMP_VERSION_3,
 			    igi->igi_ifp, igi->igi_ifp->if_xname);
 			igi->igi_v2_timer = 0;
 			igi->igi_version = IGMP_VERSION_3;
 		} else {
 			--igi->igi_v2_timer;
 			if (igi->igi_version != IGMP_VERSION_2) {
 				CTR5(KTR_IGMPV3,
 				    "%s: transition from v%d -> v%d on %p(%s)",
 				    __func__, igi->igi_version, IGMP_VERSION_2,
 				    igi->igi_ifp, igi->igi_ifp->if_xname);
 				igi->igi_version = IGMP_VERSION_2;
 			}
 		}
 	} else if (igi->igi_v1_timer > 0) {
 		/*
 		 * IGMPv1 Querier Present timer running.
 		 * Stop IGMPv2 timer if running.
 		 *
 		 * If IGMPv1 was disabled since last timeout,
 		 * revert to IGMPv3.
 		 * If IGMPv1 is enabled, reset IGMPv2 timer if running.
 		 */
 		if (!V_igmp_v1enable) {
 			CTR5(KTR_IGMPV3,
 			    "%s: transition from v%d -> v%d on %p(%s)",
 			    __func__, igi->igi_version, IGMP_VERSION_3,
 			    igi->igi_ifp, igi->igi_ifp->if_xname);
 			igi->igi_v1_timer = 0;
 			igi->igi_version = IGMP_VERSION_3;
 		} else {
 			--igi->igi_v1_timer;
 		}
 		if (igi->igi_v2_timer > 0) {
 			CTR3(KTR_IGMPV3,
 			    "%s: cancel v2 timer on %p(%s)",
 			    __func__, igi->igi_ifp, igi->igi_ifp->if_xname);
 			igi->igi_v2_timer = 0;
 		}
 	}
 }
 
 /*
  * Global slowtimo handler.
  * VIMAGE: Timeout handlers are expected to service all vimages.
  */
 void
 igmp_slowtimo(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	VNET_LIST_RLOCK();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		igmp_slowtimo_vnet();
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK();
 }
 
 /*
  * Per-vnet slowtimo handler.
  */
 static void
 igmp_slowtimo_vnet(void)
 {
 	INIT_VNET_INET(curvnet);
 	struct igmp_ifinfo *igi;
 
 	IGMP_LOCK();
 
 	LIST_FOREACH(igi, &V_igi_head, igi_link) {
 		igmp_v1v2_process_querier_timers(igi);
 	}
 
 	IGMP_UNLOCK();
 }
 
 /*
  * Dispatch an IGMPv1/v2 host report or leave message.
  * These are always small enough to fit inside a single mbuf.
  */
 static int
 igmp_v1v2_queue_report(struct in_multi *inm, const int type)
 {
 	struct ifnet		*ifp;
 	struct igmp		*igmp;
 	struct ip		*ip;
 	struct mbuf		*m;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	ifp = inm->inm_ifp;
 
 	MGETHDR(m, M_DONTWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOMEM);
 	MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
 
 	m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp);
 
 	m->m_data += sizeof(struct ip);
 	m->m_len = sizeof(struct igmp);
 
 	igmp = mtod(m, struct igmp *);
 	igmp->igmp_type = type;
 	igmp->igmp_code = 0;
 	igmp->igmp_group = inm->inm_addr;
 	igmp->igmp_cksum = 0;
 	igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp));
 
 	m->m_data -= sizeof(struct ip);
 	m->m_len += sizeof(struct ip);
 
 	ip = mtod(m, struct ip *);
 	ip->ip_tos = 0;
 	ip->ip_len = sizeof(struct ip) + sizeof(struct igmp);
 	ip->ip_off = 0;
 	ip->ip_p = IPPROTO_IGMP;
 	ip->ip_src.s_addr = INADDR_ANY;
 
 	if (type == IGMP_HOST_LEAVE_MESSAGE)
 		ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP);
 	else
 		ip->ip_dst = inm->inm_addr;
 
 	igmp_save_context(m, ifp);
 
 	m->m_flags |= M_IGMPV2;
 	if (inm->inm_igi->igi_flags & IGIF_LOOPBACK)
 		m->m_flags |= M_IGMP_LOOP;
 
 	CTR2(KTR_IGMPV3, "%s: netisr_dispatch(NETISR_IGMP, %p)", __func__, m);
 	netisr_dispatch(NETISR_IGMP, m);
 
 	return (0);
 }
 
 /*
  * Process a state change from the upper layer for the given IPv4 group.
  *
  * Each socket holds a reference on the in_multi in its own ip_moptions.
  * The socket layer will have made the necessary updates to.the group
  * state, it is now up to IGMP to issue a state change report if there
  * has been any change between T0 (when the last state-change was issued)
  * and T1 (now).
  *
  * We use the IGMPv3 state machine at group level. The IGMP module
  * however makes the decision as to which IGMP protocol version to speak.
  * A state change *from* INCLUDE {} always means an initial join.
  * A state change *to* INCLUDE {} always means a final leave.
  *
  * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can
  * save ourselves a bunch of work; any exclusive mode groups need not
  * compute source filter lists.
  *
  * VIMAGE: curvnet should have been set by caller, as this routine
  * is called from the socket option handlers.
  */
 int
 igmp_change_state(struct in_multi *inm)
 {
 	struct igmp_ifinfo *igi;
 	struct ifnet *ifp;
 	int error;
 
 	IN_MULTI_LOCK_ASSERT();
 
 	error = 0;
 
 	/*
 	 * Try to detect if the upper layer just asked us to change state
 	 * for an interface which has now gone away.
 	 */
 	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
 	ifp = inm->inm_ifma->ifma_ifp;
 	if (ifp != NULL) {
 		/*
 		 * Sanity check that netinet's notion of ifp is the
 		 * same as net's.
 		 */
 		KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
 	}
 
 	IGMP_LOCK();
 
 	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
 	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
 
 	/*
 	 * If we detect a state transition to or from MCAST_UNDEFINED
 	 * for this group, then we are starting or finishing an IGMP
 	 * life cycle for this group.
 	 */
 	if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) {
 		CTR3(KTR_IGMPV3, "%s: inm transition %d -> %d", __func__,
 		    inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode);
 		if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) {
 			CTR1(KTR_IGMPV3, "%s: initial join", __func__);
 			error = igmp_initial_join(inm, igi);
 			goto out_locked;
 		} else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) {
 			CTR1(KTR_IGMPV3, "%s: final leave", __func__);
 			igmp_final_leave(inm, igi);
 			goto out_locked;
 		}
 	} else {
 		CTR1(KTR_IGMPV3, "%s: filter set change", __func__);
 	}
 
 	error = igmp_handle_state_change(inm, igi);
 
 out_locked:
 	IGMP_UNLOCK();
 	return (error);
 }
 
 /*
  * Perform the initial join for an IGMP group.
  *
  * When joining a group:
  *  If the group should have its IGMP traffic suppressed, do nothing.
  *  IGMPv1 starts sending IGMPv1 host membership reports.
  *  IGMPv2 starts sending IGMPv2 host membership reports.
  *  IGMPv3 will schedule an IGMPv3 state-change report containing the
  *  initial state of the membership.
  */
 static int
 igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi)
 {
 	INIT_VNET_INET(curvnet);
 	struct ifnet		*ifp;
 	struct ifqueue		*ifq;
 	int			 error, retval, syncstates;
 
 	CTR4(KTR_IGMPV3, "%s: initial join %s on ifp %p(%s)",
 	    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp,
 	    inm->inm_ifp->if_xname);
 
 	error = 0;
 	syncstates = 1;
 
 	ifp = inm->inm_ifp;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__));
 
 	/*
 	 * Groups joined on loopback or marked as 'not reported',
 	 * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and
 	 * are never reported in any IGMP protocol exchanges.
 	 * All other groups enter the appropriate IGMP state machine
 	 * for the version in use on this link.
 	 * A link marked as IGIF_SILENT causes IGMP to be completely
 	 * disabled for the link.
 	 */
 	if ((ifp->if_flags & IFF_LOOPBACK) ||
 	    (igi->igi_flags & IGIF_SILENT) ||
 	    !igmp_isgroupreported(inm->inm_addr)) {
 		CTR1(KTR_IGMPV3,
 "%s: not kicking state machine for silent group", __func__);
 		inm->inm_state = IGMP_SILENT_MEMBER;
 		inm->inm_timer = 0;
 	} else {
 		/*
 		 * Deal with overlapping in_multi lifecycle.
 		 * If this group was LEAVING, then make sure
 		 * we drop the reference we picked up to keep the
 		 * group around for the final INCLUDE {} enqueue.
 		 */
 		if (igi->igi_version == IGMP_VERSION_3 &&
 		    inm->inm_state == IGMP_LEAVING_MEMBER)
 			inm_release_locked(inm);
 
 		inm->inm_state = IGMP_REPORTING_MEMBER;
 
 		switch (igi->igi_version) {
 		case IGMP_VERSION_1:
 		case IGMP_VERSION_2:
 			inm->inm_state = IGMP_IDLE_MEMBER;
 			error = igmp_v1v2_queue_report(inm,
 			    (igi->igi_version == IGMP_VERSION_2) ?
 			     IGMP_v2_HOST_MEMBERSHIP_REPORT :
 			     IGMP_v1_HOST_MEMBERSHIP_REPORT);
 			if (error == 0) {
 				inm->inm_timer = IGMP_RANDOM_DELAY(
 				    IGMP_V1V2_MAX_RI * PR_FASTHZ);
 				V_current_state_timers_running = 1;
 			}
 			break;
 
 		case IGMP_VERSION_3:
 			/*
 			 * Defer update of T0 to T1, until the first copy
 			 * of the state change has been transmitted.
 			 */
 			syncstates = 0;
 
 			/*
 			 * Immediately enqueue a State-Change Report for
 			 * this interface, freeing any previous reports.
 			 * Don't kick the timers if there is nothing to do,
 			 * or if an error occurred.
 			 */
 			ifq = &inm->inm_scq;
 			_IF_DRAIN(ifq);
 			retval = igmp_v3_enqueue_group_record(ifq, inm, 1,
 			    0, 0);
 			CTR2(KTR_IGMPV3, "%s: enqueue record = %d",
 			    __func__, retval);
 			if (retval <= 0) {
 				error = retval * -1;
 				break;
 			}
 
 			/*
 			 * Schedule transmission of pending state-change
 			 * report up to RV times for this link. The timer
 			 * will fire at the next igmp_fasttimo (~200ms),
 			 * giving us an opportunity to merge the reports.
 			 */
 			if (igi->igi_flags & IGIF_LOOPBACK) {
 				inm->inm_scrv = 1;
 			} else {
 				KASSERT(igi->igi_rv > 1,
 				   ("%s: invalid robustness %d", __func__,
 				    igi->igi_rv));
 				inm->inm_scrv = igi->igi_rv;
 			}
 			inm->inm_sctimer = 1;
 			V_state_change_timers_running = 1;
 
 			error = 0;
 			break;
 		}
 	}
 
 	/*
 	 * Only update the T0 state if state change is atomic,
 	 * i.e. we don't need to wait for a timer to fire before we
 	 * can consider the state change to have been communicated.
 	 */
 	if (syncstates) {
 		inm_commit(inm);
 		CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__,
 		    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
 	}
 
 	return (error);
 }
 
 /*
  * Issue an intermediate state change during the IGMP life-cycle.
  */
 static int
 igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi)
 {
 	INIT_VNET_INET(curvnet);
 	struct ifnet		*ifp;
 	int			 retval;
 
 	CTR4(KTR_IGMPV3, "%s: state change for %s on ifp %p(%s)",
 	    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp,
 	    inm->inm_ifp->if_xname);
 
 	ifp = inm->inm_ifp;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__));
 
 	if ((ifp->if_flags & IFF_LOOPBACK) ||
 	    (igi->igi_flags & IGIF_SILENT) ||
 	    !igmp_isgroupreported(inm->inm_addr) ||
 	    (igi->igi_version != IGMP_VERSION_3)) {
 		if (!igmp_isgroupreported(inm->inm_addr)) {
 			CTR1(KTR_IGMPV3,
 "%s: not kicking state machine for silent group", __func__);
 		}
 		CTR1(KTR_IGMPV3, "%s: nothing to do", __func__);
 		inm_commit(inm);
 		CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__,
 		    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
 		return (0);
 	}
 
 	_IF_DRAIN(&inm->inm_scq);
 
 	retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0);
 	CTR2(KTR_IGMPV3, "%s: enqueue record = %d", __func__, retval);
 	if (retval <= 0)
 		return (-retval);
 
 	/*
 	 * If record(s) were enqueued, start the state-change
 	 * report timer for this group.
 	 */
 	inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : igi->igi_rv);
 	inm->inm_sctimer = 1;
 	V_state_change_timers_running = 1;
 
 	return (0);
 }
 
 /*
  * Perform the final leave for an IGMP group.
  *
  * When leaving a group:
  *  IGMPv1 does nothing.
  *  IGMPv2 sends a host leave message, if and only if we are the reporter.
  *  IGMPv3 enqueues a state-change report containing a transition
  *  to INCLUDE {} for immediate transmission.
  */
 static void
 igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi)
 {
 	INIT_VNET_INET(curvnet);
 	int syncstates;
 
 	syncstates = 1;
 
 	CTR4(KTR_IGMPV3, "%s: final leave %s on ifp %p(%s)",
 	    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp,
 	    inm->inm_ifp->if_xname);
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	switch (inm->inm_state) {
 	case IGMP_NOT_MEMBER:
 	case IGMP_SILENT_MEMBER:
 	case IGMP_LEAVING_MEMBER:
 		/* Already leaving or left; do nothing. */
 		CTR1(KTR_IGMPV3,
 "%s: not kicking state machine for silent group", __func__);
 		break;
 	case IGMP_REPORTING_MEMBER:
 	case IGMP_IDLE_MEMBER:
 	case IGMP_G_QUERY_PENDING_MEMBER:
 	case IGMP_SG_QUERY_PENDING_MEMBER:
 		if (igi->igi_version == IGMP_VERSION_2) {
 #ifdef INVARIANTS
 			if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
 			    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER)
 			panic("%s: IGMPv3 state reached, not IGMPv3 mode",
 			     __func__);
 #endif
 			igmp_v1v2_queue_report(inm, IGMP_HOST_LEAVE_MESSAGE);
 			inm->inm_state = IGMP_NOT_MEMBER;
 		} else if (igi->igi_version == IGMP_VERSION_3) {
 			/*
 			 * Stop group timer and all pending reports.
 			 * Immediately enqueue a state-change report
 			 * TO_IN {} to be sent on the next fast timeout,
 			 * giving us an opportunity to merge reports.
 			 */
 			_IF_DRAIN(&inm->inm_scq);
 			inm->inm_timer = 0;
 			if (igi->igi_flags & IGIF_LOOPBACK) {
 				inm->inm_scrv = 1;
 			} else {
 				inm->inm_scrv = igi->igi_rv;
 			}
 			CTR4(KTR_IGMPV3, "%s: Leaving %s/%s with %d "
 			    "pending retransmissions.", __func__,
 			    inet_ntoa(inm->inm_addr),
 			    inm->inm_ifp->if_xname, inm->inm_scrv);
 			if (inm->inm_scrv == 0) {
 				inm->inm_state = IGMP_NOT_MEMBER;
 				inm->inm_sctimer = 0;
 			} else {
 				int retval;
 
 				inm_acquire_locked(inm);
 
 				retval = igmp_v3_enqueue_group_record(
 				    &inm->inm_scq, inm, 1, 0, 0);
 				KASSERT(retval != 0,
 				    ("%s: enqueue record = %d", __func__,
 				     retval));
 
 				inm->inm_state = IGMP_LEAVING_MEMBER;
 				inm->inm_sctimer = 1;
 				V_state_change_timers_running = 1;
 				syncstates = 0;
 			}
 			break;
 		}
 		break;
 	case IGMP_LAZY_MEMBER:
 	case IGMP_SLEEPING_MEMBER:
 	case IGMP_AWAKENING_MEMBER:
 		/* Our reports are suppressed; do nothing. */
 		break;
 	}
 
 	if (syncstates) {
 		inm_commit(inm);
 		CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__,
 		    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
 		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
 		CTR3(KTR_IGMPV3, "%s: T1 now MCAST_UNDEFINED for %s/%s",
 		    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
 	}
 }
 
 /*
  * Enqueue an IGMPv3 group record to the given output queue.
  *
  * XXX This function could do with having the allocation code
  * split out, and the multiple-tree-walks coalesced into a single
  * routine as has been done in igmp_v3_enqueue_filter_change().
  *
  * If is_state_change is zero, a current-state record is appended.
  * If is_state_change is non-zero, a state-change report is appended.
  *
  * If is_group_query is non-zero, an mbuf packet chain is allocated.
  * If is_group_query is zero, and if there is a packet with free space
  * at the tail of the queue, it will be appended to providing there
  * is enough free space.
  * Otherwise a new mbuf packet chain is allocated.
  *
  * If is_source_query is non-zero, each source is checked to see if
  * it was recorded for a Group-Source query, and will be omitted if
  * it is not both in-mode and recorded.
  *
  * The function will attempt to allocate leading space in the packet
  * for the IP/IGMP header to be prepended without fragmenting the chain.
  *
  * If successful the size of all data appended to the queue is returned,
  * otherwise an error code less than zero is returned, or zero if
  * no record(s) were appended.
  */
 static int
 igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
     const int is_state_change, const int is_group_query,
     const int is_source_query)
 {
 	struct igmp_grouprec	 ig;
 	struct igmp_grouprec	*pig;
 	struct ifnet		*ifp;
 	struct ip_msource	*ims, *nims;
 	struct mbuf		*m0, *m, *md;
 	int			 error, is_filter_list_change;
 	int			 minrec0len, m0srcs, msrcs, nbytes, off;
 	int			 record_has_sources;
 	int			 now;
 	int			 type;
 	in_addr_t		 naddr;
 	uint8_t			 mode;
 
 	IN_MULTI_LOCK_ASSERT();
 
 	error = 0;
 	ifp = inm->inm_ifp;
 	is_filter_list_change = 0;
 	m = NULL;
 	m0 = NULL;
 	m0srcs = 0;
 	msrcs = 0;
 	nbytes = 0;
 	nims = NULL;
 	record_has_sources = 1;
 	pig = NULL;
 	type = IGMP_DO_NOTHING;
 	mode = inm->inm_st[1].iss_fmode;
 
 	/*
 	 * If we did not transition out of ASM mode during t0->t1,
 	 * and there are no source nodes to process, we can skip
 	 * the generation of source records.
 	 */
 	if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 &&
 	    inm->inm_nsrc == 0)
 		record_has_sources = 0;
 
 	if (is_state_change) {
 		/*
 		 * Queue a state change record.
 		 * If the mode did not change, and there are non-ASM
 		 * listeners or source filters present,
 		 * we potentially need to issue two records for the group.
 		 * If we are transitioning to MCAST_UNDEFINED, we need
 		 * not send any sources.
 		 * If there are ASM listeners, and there was no filter
 		 * mode transition of any kind, do nothing.
 		 */
 		if (mode != inm->inm_st[0].iss_fmode) {
 			if (mode == MCAST_EXCLUDE) {
 				CTR1(KTR_IGMPV3, "%s: change to EXCLUDE",
 				    __func__);
 				type = IGMP_CHANGE_TO_EXCLUDE_MODE;
 			} else {
 				CTR1(KTR_IGMPV3, "%s: change to INCLUDE",
 				    __func__);
 				type = IGMP_CHANGE_TO_INCLUDE_MODE;
 				if (mode == MCAST_UNDEFINED)
 					record_has_sources = 0;
 			}
 		} else {
 			if (record_has_sources) {
 				is_filter_list_change = 1;
 			} else {
 				type = IGMP_DO_NOTHING;
 			}
 		}
 	} else {
 		/*
 		 * Queue a current state record.
 		 */
 		if (mode == MCAST_EXCLUDE) {
 			type = IGMP_MODE_IS_EXCLUDE;
 		} else if (mode == MCAST_INCLUDE) {
 			type = IGMP_MODE_IS_INCLUDE;
 			KASSERT(inm->inm_st[1].iss_asm == 0,
 			    ("%s: inm %p is INCLUDE but ASM count is %d",
 			     __func__, inm, inm->inm_st[1].iss_asm));
 		}
 	}
 
 	/*
 	 * Generate the filter list changes using a separate function.
 	 */
 	if (is_filter_list_change)
 		return (igmp_v3_enqueue_filter_change(ifq, inm));
 
 	if (type == IGMP_DO_NOTHING) {
 		CTR3(KTR_IGMPV3, "%s: nothing to do for %s/%s",
 		    __func__, inet_ntoa(inm->inm_addr),
 		    inm->inm_ifp->if_xname);
 		return (0);
 	}
 
 	/*
 	 * If any sources are present, we must be able to fit at least
 	 * one in the trailing space of the tail packet's mbuf,
 	 * ideally more.
 	 */
 	minrec0len = sizeof(struct igmp_grouprec);
 	if (record_has_sources)
 		minrec0len += sizeof(in_addr_t);
 
 	CTR4(KTR_IGMPV3, "%s: queueing %s for %s/%s", __func__,
 	    igmp_rec_type_to_str(type), inet_ntoa(inm->inm_addr),
 	    inm->inm_ifp->if_xname);
 
 	/*
 	 * Check if we have a packet in the tail of the queue for this
 	 * group into which the first group record for this group will fit.
 	 * Otherwise allocate a new packet.
 	 * Always allocate leading space for IP+RA_OPT+IGMP+REPORT.
 	 * Note: Group records for G/GSR query responses MUST be sent
 	 * in their own packet.
 	 */
 	m0 = ifq->ifq_tail;
 	if (!is_group_query &&
 	    m0 != NULL &&
 	    (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) &&
 	    (m0->m_pkthdr.len + minrec0len) <
 	     (ifp->if_mtu - IGMP_LEADINGSPACE)) {
 		m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
 			    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
 		m = m0;
 		CTR1(KTR_IGMPV3, "%s: use existing packet", __func__);
 	} else {
 		if (_IF_QFULL(ifq)) {
 			CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__);
 			return (-ENOMEM);
 		}
 		m = NULL;
 		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
 		if (!is_state_change && !is_group_query) {
 			m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
 			if (m)
 				m->m_data += IGMP_LEADINGSPACE;
 		}
 		if (m == NULL) {
 			m = m_gethdr(M_DONTWAIT, MT_DATA);
 			if (m)
 				MH_ALIGN(m, IGMP_LEADINGSPACE);
 		}
 		if (m == NULL)
 			return (-ENOMEM);
 
 		igmp_save_context(m, ifp);
 
 		CTR1(KTR_IGMPV3, "%s: allocated first packet", __func__);
 	}
 
 	/*
 	 * Append group record.
 	 * If we have sources, we don't know how many yet.
 	 */
 	ig.ig_type = type;
 	ig.ig_datalen = 0;
 	ig.ig_numsrc = 0;
 	ig.ig_group = inm->inm_addr;
 	if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
 		if (m != m0)
 			m_freem(m);
 		CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__);
 		return (-ENOMEM);
 	}
 	nbytes += sizeof(struct igmp_grouprec);
 
 	/*
 	 * Append as many sources as will fit in the first packet.
 	 * If we are appending to a new packet, the chain allocation
 	 * may potentially use clusters; use m_getptr() in this case.
 	 * If we are appending to an existing packet, we need to obtain
 	 * a pointer to the group record after m_append(), in case a new
 	 * mbuf was allocated.
 	 * Only append sources which are in-mode at t1. If we are
 	 * transitioning to MCAST_UNDEFINED state on the group, do not
 	 * include source entries.
 	 * Only report recorded sources in our filter set when responding
 	 * to a group-source query.
 	 */
 	if (record_has_sources) {
 		if (m == m0) {
 			md = m_last(m);
 			pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) +
 			    md->m_len - nbytes);
 		} else {
 			md = m_getptr(m, 0, &off);
 			pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) +
 			    off);
 		}
 		msrcs = 0;
 		RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) {
 			CTR2(KTR_IGMPV3, "%s: visit node %s", __func__,
 			    inet_ntoa_haddr(ims->ims_haddr));
 			now = ims_get_mode(inm, ims, 1);
 			CTR2(KTR_IGMPV3, "%s: node is %d", __func__, now);
 			if ((now != mode) ||
 			    (now == mode && mode == MCAST_UNDEFINED)) {
 				CTR1(KTR_IGMPV3, "%s: skip node", __func__);
 				continue;
 			}
 			if (is_source_query && ims->ims_stp == 0) {
 				CTR1(KTR_IGMPV3, "%s: skip unrecorded node",
 				    __func__);
 				continue;
 			}
 			CTR1(KTR_IGMPV3, "%s: append node", __func__);
 			naddr = htonl(ims->ims_haddr);
 			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
 				if (m != m0)
 					m_freem(m);
 				CTR1(KTR_IGMPV3, "%s: m_append() failed.",
 				    __func__);
 				return (-ENOMEM);
 			}
 			nbytes += sizeof(in_addr_t);
 			++msrcs;
 			if (msrcs == m0srcs)
 				break;
 		}
 		CTR2(KTR_IGMPV3, "%s: msrcs is %d this packet", __func__,
 		    msrcs);
 		pig->ig_numsrc = htons(msrcs);
 		nbytes += (msrcs * sizeof(in_addr_t));
 	}
 
 	if (is_source_query && msrcs == 0) {
 		CTR1(KTR_IGMPV3, "%s: no recorded sources to report", __func__);
 		if (m != m0)
 			m_freem(m);
 		return (0);
 	}
 
 	/*
 	 * We are good to go with first packet.
 	 */
 	if (m != m0) {
 		CTR1(KTR_IGMPV3, "%s: enqueueing first packet", __func__);
 		m->m_pkthdr.PH_vt.vt_nrecs = 1;
 		_IF_ENQUEUE(ifq, m);
 	} else
 		m->m_pkthdr.PH_vt.vt_nrecs++;
 
 	/*
 	 * No further work needed if no source list in packet(s).
 	 */
 	if (!record_has_sources)
 		return (nbytes);
 
 	/*
 	 * Whilst sources remain to be announced, we need to allocate
 	 * a new packet and fill out as many sources as will fit.
 	 * Always try for a cluster first.
 	 */
 	while (nims != NULL) {
 		if (_IF_QFULL(ifq)) {
 			CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__);
 			return (-ENOMEM);
 		}
 		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
 		if (m)
 			m->m_data += IGMP_LEADINGSPACE;
 		if (m == NULL) {
 			m = m_gethdr(M_DONTWAIT, MT_DATA);
 			if (m)
 				MH_ALIGN(m, IGMP_LEADINGSPACE);
 		}
 		if (m == NULL)
 			return (-ENOMEM);
 		igmp_save_context(m, ifp);
 		md = m_getptr(m, 0, &off);
 		pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + off);
 		CTR1(KTR_IGMPV3, "%s: allocated next packet", __func__);
 
 		if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
 			if (m != m0)
 				m_freem(m);
 			CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__);
 			return (-ENOMEM);
 		}
 		m->m_pkthdr.PH_vt.vt_nrecs = 1;
 		nbytes += sizeof(struct igmp_grouprec);
 
 		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
 
 		msrcs = 0;
 		RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
 			CTR2(KTR_IGMPV3, "%s: visit node %s", __func__,
 			    inet_ntoa_haddr(ims->ims_haddr));
 			now = ims_get_mode(inm, ims, 1);
 			if ((now != mode) ||
 			    (now == mode && mode == MCAST_UNDEFINED)) {
 				CTR1(KTR_IGMPV3, "%s: skip node", __func__);
 				continue;
 			}
 			if (is_source_query && ims->ims_stp == 0) {
 				CTR1(KTR_IGMPV3, "%s: skip unrecorded node",
 				    __func__);
 				continue;
 			}
 			CTR1(KTR_IGMPV3, "%s: append node", __func__);
 			naddr = htonl(ims->ims_haddr);
 			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
 				if (m != m0)
 					m_freem(m);
 				CTR1(KTR_IGMPV3, "%s: m_append() failed.",
 				    __func__);
 				return (-ENOMEM);
 			}
 			++msrcs;
 			if (msrcs == m0srcs)
 				break;
 		}
 		pig->ig_numsrc = htons(msrcs);
 		nbytes += (msrcs * sizeof(in_addr_t));
 
 		CTR1(KTR_IGMPV3, "%s: enqueueing next packet", __func__);
 		_IF_ENQUEUE(ifq, m);
 	}
 
 	return (nbytes);
 }
 
 /*
  * Type used to mark record pass completion.
  * We exploit the fact we can cast to this easily from the
  * current filter modes on each ip_msource node.
  */
 typedef enum {
 	REC_NONE = 0x00,	/* MCAST_UNDEFINED */
 	REC_ALLOW = 0x01,	/* MCAST_INCLUDE */
 	REC_BLOCK = 0x02,	/* MCAST_EXCLUDE */
 	REC_FULL = REC_ALLOW | REC_BLOCK
 } rectype_t;
 
 /*
  * Enqueue an IGMPv3 filter list change to the given output queue.
  *
  * Source list filter state is held in an RB-tree. When the filter list
  * for a group is changed without changing its mode, we need to compute
  * the deltas between T0 and T1 for each source in the filter set,
  * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
  *
  * As we may potentially queue two record types, and the entire R-B tree
  * needs to be walked at once, we break this out into its own function
  * so we can generate a tightly packed queue of packets.
  *
  * XXX This could be written to only use one tree walk, although that makes
  * serializing into the mbuf chains a bit harder. For now we do two walks
  * which makes things easier on us, and it may or may not be harder on
  * the L2 cache.
  *
  * If successful the size of all data appended to the queue is returned,
  * otherwise an error code less than zero is returned, or zero if
  * no record(s) were appended.
  */
 static int
 igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
 {
 	static const int MINRECLEN =
 	    sizeof(struct igmp_grouprec) + sizeof(in_addr_t);
 	struct ifnet		*ifp;
 	struct igmp_grouprec	 ig;
 	struct igmp_grouprec	*pig;
 	struct ip_msource	*ims, *nims;
 	struct mbuf		*m, *m0, *md;
 	in_addr_t		 naddr;
 	int			 m0srcs, nbytes, npbytes, off, rsrcs, schanged;
 	int			 nallow, nblock;
 	uint8_t			 mode, now, then;
 	rectype_t		 crt, drt, nrt;
 
 	IN_MULTI_LOCK_ASSERT();
 
 	if (inm->inm_nsrc == 0 ||
 	    (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0))
 		return (0);
 
 	ifp = inm->inm_ifp;			/* interface */
 	mode = inm->inm_st[1].iss_fmode;	/* filter mode at t1 */
 	crt = REC_NONE;	/* current group record type */
 	drt = REC_NONE;	/* mask of completed group record types */
 	nrt = REC_NONE;	/* record type for current node */
 	m0srcs = 0;	/* # source which will fit in current mbuf chain */
 	nbytes = 0;	/* # of bytes appended to group's state-change queue */
 	npbytes = 0;	/* # of bytes appended this packet */
 	rsrcs = 0;	/* # sources encoded in current record */
 	schanged = 0;	/* # nodes encoded in overall filter change */
 	nallow = 0;	/* # of source entries in ALLOW_NEW */
 	nblock = 0;	/* # of source entries in BLOCK_OLD */
 	nims = NULL;	/* next tree node pointer */
 
 	/*
 	 * For each possible filter record mode.
 	 * The first kind of source we encounter tells us which
 	 * is the first kind of record we start appending.
 	 * If a node transitioned to UNDEFINED at t1, its mode is treated
 	 * as the inverse of the group's filter mode.
 	 */
 	while (drt != REC_FULL) {
 		do {
 			m0 = ifq->ifq_tail;
 			if (m0 != NULL &&
 			    (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <=
 			     IGMP_V3_REPORT_MAXRECS) &&
 			    (m0->m_pkthdr.len + MINRECLEN) <
 			     (ifp->if_mtu - IGMP_LEADINGSPACE)) {
 				m = m0;
 				m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
 					    sizeof(struct igmp_grouprec)) /
 				    sizeof(in_addr_t);
 				CTR1(KTR_IGMPV3,
 				    "%s: use previous packet", __func__);
 			} else {
 				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
 				if (m)
 					m->m_data += IGMP_LEADINGSPACE;
 				if (m == NULL) {
 					m = m_gethdr(M_DONTWAIT, MT_DATA);
 					if (m)
 						MH_ALIGN(m, IGMP_LEADINGSPACE);
 				}
 				if (m == NULL) {
 					CTR1(KTR_IGMPV3,
 					    "%s: m_get*() failed", __func__);
 					return (-ENOMEM);
 				}
 				m->m_pkthdr.PH_vt.vt_nrecs = 0;
 				igmp_save_context(m, ifp);
 				m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
 				    sizeof(struct igmp_grouprec)) /
 				    sizeof(in_addr_t);
 				npbytes = 0;
 				CTR1(KTR_IGMPV3,
 				    "%s: allocated new packet", __func__);
 			}
 			/*
 			 * Append the IGMP group record header to the
 			 * current packet's data area.
 			 * Recalculate pointer to free space for next
 			 * group record, in case m_append() allocated
 			 * a new mbuf or cluster.
 			 */
 			memset(&ig, 0, sizeof(ig));
 			ig.ig_group = inm->inm_addr;
 			if (!m_append(m, sizeof(ig), (void *)&ig)) {
 				if (m != m0)
 					m_freem(m);
 				CTR1(KTR_IGMPV3,
 				    "%s: m_append() failed", __func__);
 				return (-ENOMEM);
 			}
 			npbytes += sizeof(struct igmp_grouprec);
 			if (m != m0) {
 				/* new packet; offset in c hain */
 				md = m_getptr(m, npbytes -
 				    sizeof(struct igmp_grouprec), &off);
 				pig = (struct igmp_grouprec *)(mtod(md,
 				    uint8_t *) + off);
 			} else {
 				/* current packet; offset from last append */
 				md = m_last(m);
 				pig = (struct igmp_grouprec *)(mtod(md,
 				    uint8_t *) + md->m_len -
 				    sizeof(struct igmp_grouprec));
 			}
 			/*
 			 * Begin walking the tree for this record type
 			 * pass, or continue from where we left off
 			 * previously if we had to allocate a new packet.
 			 * Only report deltas in-mode at t1.
 			 * We need not report included sources as allowed
 			 * if we are in inclusive mode on the group,
 			 * however the converse is not true.
 			 */
 			rsrcs = 0;
 			if (nims == NULL)
 				nims = RB_MIN(ip_msource_tree, &inm->inm_srcs);
 			RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
 				CTR2(KTR_IGMPV3, "%s: visit node %s",
 				    __func__, inet_ntoa_haddr(ims->ims_haddr));
 				now = ims_get_mode(inm, ims, 1);
 				then = ims_get_mode(inm, ims, 0);
 				CTR3(KTR_IGMPV3, "%s: mode: t0 %d, t1 %d",
 				    __func__, then, now);
 				if (now == then) {
 					CTR1(KTR_IGMPV3,
 					    "%s: skip unchanged", __func__);
 					continue;
 				}
 				if (mode == MCAST_EXCLUDE &&
 				    now == MCAST_INCLUDE) {
 					CTR1(KTR_IGMPV3,
 					    "%s: skip IN src on EX group",
 					    __func__);
 					continue;
 				}
 				nrt = (rectype_t)now;
 				if (nrt == REC_NONE)
 					nrt = (rectype_t)(~mode & REC_FULL);
 				if (schanged++ == 0) {
 					crt = nrt;
 				} else if (crt != nrt)
 					continue;
 				naddr = htonl(ims->ims_haddr);
 				if (!m_append(m, sizeof(in_addr_t),
 				    (void *)&naddr)) {
 					if (m != m0)
 						m_freem(m);
 					CTR1(KTR_IGMPV3,
 					    "%s: m_append() failed", __func__);
 					return (-ENOMEM);
 				}
 				nallow += !!(crt == REC_ALLOW);
 				nblock += !!(crt == REC_BLOCK);
 				if (++rsrcs == m0srcs)
 					break;
 			}
 			/*
 			 * If we did not append any tree nodes on this
 			 * pass, back out of allocations.
 			 */
 			if (rsrcs == 0) {
 				npbytes -= sizeof(struct igmp_grouprec);
 				if (m != m0) {
 					CTR1(KTR_IGMPV3,
 					    "%s: m_free(m)", __func__);
 					m_freem(m);
 				} else {
 					CTR1(KTR_IGMPV3,
 					    "%s: m_adj(m, -ig)", __func__);
 					m_adj(m, -((int)sizeof(
 					    struct igmp_grouprec)));
 				}
 				continue;
 			}
 			npbytes += (rsrcs * sizeof(in_addr_t));
 			if (crt == REC_ALLOW)
 				pig->ig_type = IGMP_ALLOW_NEW_SOURCES;
 			else if (crt == REC_BLOCK)
 				pig->ig_type = IGMP_BLOCK_OLD_SOURCES;
 			pig->ig_numsrc = htons(rsrcs);
 			/*
 			 * Count the new group record, and enqueue this
 			 * packet if it wasn't already queued.
 			 */
 			m->m_pkthdr.PH_vt.vt_nrecs++;
 			if (m != m0)
 				_IF_ENQUEUE(ifq, m);
 			nbytes += npbytes;
 		} while (nims != NULL);
 		drt |= crt;
 		crt = (~crt & REC_FULL);
 	}
 
 	CTR3(KTR_IGMPV3, "%s: queued %d ALLOW_NEW, %d BLOCK_OLD", __func__,
 	    nallow, nblock);
 
 	return (nbytes);
 }
 
 static int
 igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
 {
 	struct ifqueue	*gq;
 	struct mbuf	*m;		/* pending state-change */
 	struct mbuf	*m0;		/* copy of pending state-change */
 	struct mbuf	*mt;		/* last state-change in packet */
 	int		 docopy, domerge;
 	u_int		 recslen;
 
 	docopy = 0;
 	domerge = 0;
 	recslen = 0;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	/*
 	 * If there are further pending retransmissions, make a writable
 	 * copy of each queued state-change message before merging.
 	 */
 	if (inm->inm_scrv > 0)
 		docopy = 1;
 
 	gq = &inm->inm_scq;
 #ifdef KTR
 	if (gq->ifq_head == NULL) {
 		CTR2(KTR_IGMPV3, "%s: WARNING: queue for inm %p is empty",
 		    __func__, inm);
 	}
 #endif
 
 	m = gq->ifq_head;
 	while (m != NULL) {
 		/*
 		 * Only merge the report into the current packet if
 		 * there is sufficient space to do so; an IGMPv3 report
 		 * packet may only contain 65,535 group records.
 		 * Always use a simple mbuf chain concatentation to do this,
 		 * as large state changes for single groups may have
 		 * allocated clusters.
 		 */
 		domerge = 0;
 		mt = ifscq->ifq_tail;
 		if (mt != NULL) {
 			recslen = m_length(m, NULL);
 
 			if ((mt->m_pkthdr.PH_vt.vt_nrecs +
 			    m->m_pkthdr.PH_vt.vt_nrecs <=
 			    IGMP_V3_REPORT_MAXRECS) &&
 			    (mt->m_pkthdr.len + recslen <=
 			    (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE)))
 				domerge = 1;
 		}
 
 		if (!domerge && _IF_QFULL(gq)) {
 			CTR2(KTR_IGMPV3,
 			    "%s: outbound queue full, skipping whole packet %p",
 			    __func__, m);
 			mt = m->m_nextpkt;
 			if (!docopy)
 				m_freem(m);
 			m = mt;
 			continue;
 		}
 
 		if (!docopy) {
 			CTR2(KTR_IGMPV3, "%s: dequeueing %p", __func__, m);
 			_IF_DEQUEUE(gq, m0);
 			m = m0->m_nextpkt;
 		} else {
 			CTR2(KTR_IGMPV3, "%s: copying %p", __func__, m);
 			m0 = m_dup(m, M_NOWAIT);
 			if (m0 == NULL)
 				return (ENOMEM);
 			m0->m_nextpkt = NULL;
 			m = m->m_nextpkt;
 		}
 
 		if (!domerge) {
 			CTR3(KTR_IGMPV3, "%s: queueing %p to ifscq %p)",
 			    __func__, m0, ifscq);
 			_IF_ENQUEUE(ifscq, m0);
 		} else {
 			struct mbuf *mtl;	/* last mbuf of packet mt */
 
 			CTR3(KTR_IGMPV3, "%s: merging %p with ifscq tail %p)",
 			    __func__, m0, mt);
 
 			mtl = m_last(mt);
 			m0->m_flags &= ~M_PKTHDR;
 			mt->m_pkthdr.len += recslen;
 			mt->m_pkthdr.PH_vt.vt_nrecs +=
 			    m0->m_pkthdr.PH_vt.vt_nrecs;
 
 			mtl->m_next = m0;
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Respond to a pending IGMPv3 General Query.
  */
 static void
 igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
 {
 	INIT_VNET_INET(curvnet);
 	struct ifmultiaddr	*ifma, *tifma;
 	struct ifnet		*ifp;
 	struct in_multi		*inm;
 	int			 retval, loop;
 
 	IN_MULTI_LOCK_ASSERT();
 	IGMP_LOCK_ASSERT();
 
 	KASSERT(igi->igi_version == IGMP_VERSION_3,
 	    ("%s: called when version %d", __func__, igi->igi_version));
 
 	ifp = igi->igi_ifp;
 
 	IF_ADDR_LOCK(ifp);
 	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, tifma) {
 		if (ifma->ifma_addr->sa_family != AF_INET ||
 		    ifma->ifma_protospec == NULL)
 			continue;
 
 		inm = (struct in_multi *)ifma->ifma_protospec;
 		KASSERT(ifp == inm->inm_ifp,
 		    ("%s: inconsistent ifp", __func__));
 
 		switch (inm->inm_state) {
 		case IGMP_NOT_MEMBER:
 		case IGMP_SILENT_MEMBER:
 			break;
 		case IGMP_REPORTING_MEMBER:
 		case IGMP_IDLE_MEMBER:
 		case IGMP_LAZY_MEMBER:
 		case IGMP_SLEEPING_MEMBER:
 		case IGMP_AWAKENING_MEMBER:
 			inm->inm_state = IGMP_REPORTING_MEMBER;
 			retval = igmp_v3_enqueue_group_record(&igi->igi_gq,
 			    inm, 0, 0, 0);
 			CTR2(KTR_IGMPV3, "%s: enqueue record = %d",
 			    __func__, retval);
 			break;
 		case IGMP_G_QUERY_PENDING_MEMBER:
 		case IGMP_SG_QUERY_PENDING_MEMBER:
 		case IGMP_LEAVING_MEMBER:
 			break;
 		}
 	}
 	IF_ADDR_UNLOCK(ifp);
 
 	loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
 	igmp_dispatch_queue(&igi->igi_gq, IGMP_MAX_RESPONSE_BURST, loop);
 
 	/*
 	 * Slew transmission of bursts over 500ms intervals.
 	 */
 	if (igi->igi_gq.ifq_head != NULL) {
 		igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY(
 		    IGMP_RESPONSE_BURST_INTERVAL);
 		V_interface_timers_running = 1;
 	}
 }
 
 /*
  * Transmit the next pending IGMP message in the output queue.
  *
  * We get called from netisr_processqueue(). A mutex private to igmpoq
  * will be acquired and released around this routine.
  *
  * VIMAGE: Needs to store/restore vnet pointer on a per-mbuf-chain basis.
  * MRT: Nothing needs to be done, as IGMP traffic is always local to
  * a link and uses a link-scope multicast address.
  */
 static void
 igmp_intr(struct mbuf *m)
 {
 	struct ip_moptions	 imo;
 	struct ifnet		*ifp;
 	struct mbuf		*ipopts, *m0;
 	int			 error;
 	uint32_t		 ifindex;
 
 	CTR2(KTR_IGMPV3, "%s: transmit %p", __func__, m);
 
 	/*
 	 * Set VNET image pointer from enqueued mbuf chain
 	 * before doing anything else. Whilst we use interface
 	 * indexes to guard against interface detach, they are
 	 * unique to each VIMAGE and must be retrieved.
 	 */
 	CURVNET_SET((struct vnet *)(m->m_pkthdr.header));
 	INIT_VNET_NET(curvnet);
 	INIT_VNET_INET(curvnet);
 	ifindex = igmp_restore_context(m);
 
 	/*
 	 * Check if the ifnet still exists. This limits the scope of
 	 * any race in the absence of a global ifp lock for low cost
 	 * (an array lookup).
 	 */
 	ifp = ifnet_byindex(ifindex);
 	if (ifp == NULL) {
 		CTR3(KTR_IGMPV3, "%s: dropped %p as ifindex %u went away.",
 		    __func__, m, ifindex);
 		m_freem(m);
 		IPSTAT_INC(ips_noroute);
 		goto out;
 	}
 
 	ipopts = V_igmp_sendra ? m_raopt : NULL;
 
 	imo.imo_multicast_ttl  = 1;
 	imo.imo_multicast_vif  = -1;
 	imo.imo_multicast_loop = (V_ip_mrouter != NULL);
 
 	/*
 	 * If the user requested that IGMP traffic be explicitly
 	 * redirected to the loopback interface (e.g. they are running a
 	 * MANET interface and the routing protocol needs to see the
 	 * updates), handle this now.
 	 */
 	if (m->m_flags & M_IGMP_LOOP)
 		imo.imo_multicast_ifp = V_loif;
 	else
 		imo.imo_multicast_ifp = ifp;
 
 	if (m->m_flags & M_IGMPV2) {
 		m0 = m;
 	} else {
 		m0 = igmp_v3_encap_report(ifp, m);
 		if (m0 == NULL) {
 			CTR2(KTR_IGMPV3, "%s: dropped %p", __func__, m);
 			m_freem(m);
 			IPSTAT_INC(ips_odropped);
 			goto out;
 		}
 	}
 
 	igmp_scrub_context(m0);
 	m->m_flags &= ~(M_PROTOFLAGS);
 	m0->m_pkthdr.rcvif = V_loif;
 #ifdef MAC
 	mac_netinet_igmp_send(ifp, m0);
 #endif
 	error = ip_output(m0, ipopts, NULL, 0, &imo, NULL);
 	if (error) {
 		CTR3(KTR_IGMPV3, "%s: ip_output(%p) = %d", __func__, m0, error);
 		goto out;
 	}
 
 	IGMPSTAT_INC(igps_snd_reports);
 
 out:
 	/*
 	 * We must restore the existing vnet pointer before
 	 * continuing as we are run from netisr context.
 	 */
 	CURVNET_RESTORE();
 }
 
 /*
  * Encapsulate an IGMPv3 report.
  *
  * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf
  * chain has already had its IP/IGMPv3 header prepended. In this case
  * the function will not attempt to prepend; the lengths and checksums
  * will however be re-computed.
  *
  * Returns a pointer to the new mbuf chain head, or NULL if the
  * allocation failed.
  */
 static struct mbuf *
 igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
 {
 	INIT_VNET_INET(curvnet);
 	struct igmp_report	*igmp;
 	struct ip		*ip;
 	int			 hdrlen, igmpreclen;
 
 	KASSERT((m->m_flags & M_PKTHDR),
 	    ("%s: mbuf chain %p is !M_PKTHDR", __func__, m));
 
 	igmpreclen = m_length(m, NULL);
 	hdrlen = sizeof(struct ip) + sizeof(struct igmp_report);
 
 	if (m->m_flags & M_IGMPV3_HDR) {
 		igmpreclen -= hdrlen;
 	} else {
 		M_PREPEND(m, hdrlen, M_DONTWAIT);
 		if (m == NULL)
 			return (NULL);
 		m->m_flags |= M_IGMPV3_HDR;
 	}
 
 	CTR2(KTR_IGMPV3, "%s: igmpreclen is %d", __func__, igmpreclen);
 
 	m->m_data += sizeof(struct ip);
 	m->m_len -= sizeof(struct ip);
 
 	igmp = mtod(m, struct igmp_report *);
 	igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT;
 	igmp->ir_rsv1 = 0;
 	igmp->ir_rsv2 = 0;
 	igmp->ir_numgrps = htons(m->m_pkthdr.PH_vt.vt_nrecs);
 	igmp->ir_cksum = 0;
 	igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen);
 	m->m_pkthdr.PH_vt.vt_nrecs = 0;
 
 	m->m_data -= sizeof(struct ip);
 	m->m_len += sizeof(struct ip);
 
 	ip = mtod(m, struct ip *);
 	ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
 	ip->ip_len = hdrlen + igmpreclen;
 	ip->ip_off = IP_DF;
 	ip->ip_p = IPPROTO_IGMP;
 	ip->ip_sum = 0;
 
 	ip->ip_src.s_addr = INADDR_ANY;
 
 	if (m->m_flags & M_IGMP_LOOP) {
 		struct in_ifaddr *ia;
 
 		IFP_TO_IA(ifp, ia);
 		if (ia != NULL)
 			ip->ip_src = ia->ia_addr.sin_addr;
 	}
 
 	ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP);
 
 	return (m);
 }
 
 #ifdef KTR
 static char *
 igmp_rec_type_to_str(const int type)
 {
 
 	switch (type) {
 		case IGMP_CHANGE_TO_EXCLUDE_MODE:
 			return "TO_EX";
 			break;
 		case IGMP_CHANGE_TO_INCLUDE_MODE:
 			return "TO_IN";
 			break;
 		case IGMP_MODE_IS_EXCLUDE:
 			return "MODE_EX";
 			break;
 		case IGMP_MODE_IS_INCLUDE:
 			return "MODE_IN";
 			break;
 		case IGMP_ALLOW_NEW_SOURCES:
 			return "ALLOW_NEW";
 			break;
 		case IGMP_BLOCK_OLD_SOURCES:
 			return "BLOCK_OLD";
 			break;
 		default:
 			break;
 	}
 	return "unknown";
 }
 #endif
 
 static void
 igmp_sysinit(void)
 {
 
 	CTR1(KTR_IGMPV3, "%s: initializing", __func__);
 
 	IGMP_LOCK_INIT();
 
 	m_raopt = igmp_ra_alloc();
 
-	netisr2_register(&igmp_nh);
+	netisr_register(&igmp_nh);
 }
 
 static void
 igmp_sysuninit(void)
 {
 
 	CTR1(KTR_IGMPV3, "%s: tearing down", __func__);
 
-	netisr2_unregister(&igmp_nh);
+	netisr_unregister(&igmp_nh);
 
 	m_free(m_raopt);
 	m_raopt = NULL;
 
 	IGMP_LOCK_DESTROY();
 }
 
 /*
  * Initialize an IGMPv3 instance.
  * VIMAGE: Assumes curvnet set by caller and called per vimage.
  */
 static int
 vnet_igmp_iattach(const void *unused __unused)
 {
 	INIT_VNET_INET(curvnet);
 
 	CTR1(KTR_IGMPV3, "%s: initializing", __func__);
 
 	LIST_INIT(&V_igi_head);
 
 	V_current_state_timers_running = 0;
 	V_state_change_timers_running = 0;
 	V_interface_timers_running = 0;
 
 	/*
 	 * Initialize sysctls to default values.
 	 */
 	V_igmp_recvifkludge = 1;
 	V_igmp_sendra = 1;
 	V_igmp_sendlocal = 1;
 	V_igmp_v1enable = 1;
 	V_igmp_v2enable = 1;
 	V_igmp_legacysupp = 0;
 	V_igmp_default_version = IGMP_VERSION_3;
 	V_igmp_gsrdelay.tv_sec = 10;
 	V_igmp_gsrdelay.tv_usec = 0;
 
 	memset(&V_igmpstat, 0, sizeof(struct igmpstat));
 	V_igmpstat.igps_version = IGPS_VERSION_3;
 	V_igmpstat.igps_len = sizeof(struct igmpstat);
 
 	return (0);
 }
 
 static int
 vnet_igmp_idetach(const void *unused __unused)
 {
 #ifdef INVARIANTS
 	INIT_VNET_INET(curvnet);
 #endif
 
 	CTR1(KTR_IGMPV3, "%s: tearing down", __func__);
 
 	KASSERT(LIST_EMPTY(&V_igi_head),
 	    ("%s: igi list not empty; ifnets not detached?", __func__));
 
 	return (0);
 }
 
 #ifndef VIMAGE_GLOBALS
 static vnet_modinfo_t vnet_igmp_modinfo = {
 	.vmi_id		= VNET_MOD_IGMP,
 	.vmi_name	= "igmp",
 	.vmi_dependson	= VNET_MOD_INET,
 	.vmi_iattach	= vnet_igmp_iattach,
 	.vmi_idetach	= vnet_igmp_idetach
 };
 #endif
 
 static int
 igmp_modevent(module_t mod, int type, void *unused __unused)
 {
 
     switch (type) {
     case MOD_LOAD:
 	igmp_sysinit();
 #ifndef VIMAGE_GLOBALS
 	vnet_mod_register(&vnet_igmp_modinfo);
 #else
 	vnet_igmp_iattach(NULL);
 #endif
 	break;
     case MOD_UNLOAD:
 #ifndef VIMAGE_GLOBALS
 	vnet_mod_deregister(&vnet_igmp_modinfo);
 #else
 	vnet_igmp_idetach(NULL);
 #endif
 	igmp_sysuninit();
 	break;
     default:
 	return (EOPNOTSUPP);
     }
     return (0);
 }
 
 static moduledata_t igmp_mod = {
     "igmp",
     igmp_modevent,
     0
 };
 DECLARE_MODULE(igmp, igmp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
Index: projects/pnet/sys/netinet/ip_divert.c
===================================================================
--- projects/pnet/sys/netinet/ip_divert.c	(revision 193105)
+++ projects/pnet/sys/netinet/ip_divert.c	(revision 193106)
@@ -1,792 +1,792 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #if !defined(KLD_MODULE)
 #include "opt_inet.h"
 #include "opt_ipfw.h"
 #include "opt_mac.h"
 #include "opt_sctp.h"
 #ifndef INET
 #error "IPDIVERT requires INET."
 #endif
 #ifndef IPFIREWALL
 #error "IPDIVERT requires IPFIREWALL"
 #endif
 #endif
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/rwlock.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/vimage.h>
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/netisr.h> 
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_divert.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_fw.h>
 #include <netinet/vinet.h>
 #ifdef SCTP
 #include <netinet/sctp_crc32.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 /*
  * Divert sockets
  */
 
 /*
  * Allocate enough space to hold a full IP packet
  */
 #define	DIVSNDQ		(65536 + 100)
 #define	DIVRCVQ		(65536 + 100)
 
 /*
  * Divert sockets work in conjunction with ipfw, see the divert(4)
  * manpage for features.
  * Internally, packets selected by ipfw in ip_input() or ip_output(),
  * and never diverted before, are passed to the input queue of the
  * divert socket with a given 'divert_port' number (as specified in
  * the matching ipfw rule), and they are tagged with a 16 bit cookie
  * (representing the rule number of the matching ipfw rule), which
  * is passed to process reading from the socket.
  *
  * Packets written to the divert socket are again tagged with a cookie
  * (usually the same as above) and a destination address.
  * If the destination address is INADDR_ANY then the packet is
  * treated as outgoing and sent to ip_output(), otherwise it is
  * treated as incoming and sent to ip_input().
  * In both cases, the packet is tagged with the cookie.
  *
  * On reinjection, processing in ip_input() and ip_output()
  * will be exactly the same as for the original packet, except that
  * ipfw processing will start at the rule number after the one
  * written in the cookie (so, tagging a packet with a cookie of 0
  * will cause it to be effectively considered as a standard packet).
  */
 
 /* Internal variables. */
 #ifdef VIMAGE_GLOBALS
 static struct inpcbhead divcb;
 static struct inpcbinfo divcbinfo;
 #endif
 
 static u_long	div_sendspace = DIVSNDQ;	/* XXX sysctl ? */
 static u_long	div_recvspace = DIVRCVQ;	/* XXX sysctl ? */
 
 /*
  * Initialize divert connection block queue.
  */
 static void
 div_zone_change(void *tag)
 {
 	INIT_VNET_INET(curvnet);
 
 	uma_zone_set_max(V_divcbinfo.ipi_zone, maxsockets);
 }
 
 static int
 div_inpcb_init(void *mem, int size, int flags)
 {
 	struct inpcb *inp = mem;
 
 	INP_LOCK_INIT(inp, "inp", "divinp");
 	return (0);
 }
 
 static void
 div_inpcb_fini(void *mem, int size)
 {
 	struct inpcb *inp = mem;
 
 	INP_LOCK_DESTROY(inp);
 }
 
 void
 div_init(void)
 {
 	INIT_VNET_INET(curvnet);
 
 	INP_INFO_LOCK_INIT(&V_divcbinfo, "div");
 	LIST_INIT(&V_divcb);
 	V_divcbinfo.ipi_listhead = &V_divcb;
 #ifdef VIMAGE
 	V_divcbinfo.ipi_vnet = curvnet;
 #endif
 	/*
 	 * XXX We don't use the hash list for divert IP, but it's easier
 	 * to allocate a one entry hash list than it is to check all
 	 * over the place for hashbase == NULL.
 	 */
 	V_divcbinfo.ipi_hashbase = hashinit(1, M_PCB, &V_divcbinfo.ipi_hashmask);
 	V_divcbinfo.ipi_porthashbase = hashinit(1, M_PCB,
 	    &V_divcbinfo.ipi_porthashmask);
 	V_divcbinfo.ipi_zone = uma_zcreate("divcb", sizeof(struct inpcb),
 	    NULL, NULL, div_inpcb_init, div_inpcb_fini, UMA_ALIGN_PTR,
 	    UMA_ZONE_NOFREE);
 	uma_zone_set_max(V_divcbinfo.ipi_zone, maxsockets);
 	EVENTHANDLER_REGISTER(maxsockets_change, div_zone_change,
 		NULL, EVENTHANDLER_PRI_ANY);
 }
 
 /*
  * IPPROTO_DIVERT is not in the real IP protocol number space; this
  * function should never be called.  Just in case, drop any packets.
  */
 void
 div_input(struct mbuf *m, int off)
 {
 	INIT_VNET_INET(curvnet);
 
 	IPSTAT_INC(ips_noproto);
 	m_freem(m);
 }
 
 /*
  * Divert a packet by passing it up to the divert socket at port 'port'.
  *
  * Setup generic address and protocol structures for div_input routine,
  * then pass them along with mbuf chain.
  */
 static void
 divert_packet(struct mbuf *m, int incoming)
 {
 	INIT_VNET_INET(curvnet);
 	struct ip *ip;
 	struct inpcb *inp;
 	struct socket *sa;
 	u_int16_t nport;
 	struct sockaddr_in divsrc;
 	struct m_tag *mtag;
 
 	mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL);
 	if (mtag == NULL) {
 		printf("%s: no divert tag\n", __func__);
 		m_freem(m);
 		return;
 	}
 	/* Assure header */
 	if (m->m_len < sizeof(struct ip) &&
 	    (m = m_pullup(m, sizeof(struct ip))) == 0)
 		return;
 	ip = mtod(m, struct ip *);
 
 	/* Delayed checksums are currently not compatible with divert. */
 	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 		ip->ip_len = ntohs(ip->ip_len);
 		in_delayed_cksum(m);
 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 		ip->ip_len = htons(ip->ip_len);
 	}
 #ifdef SCTP
 	if (m->m_pkthdr.csum_flags & CSUM_SCTP) {
 		ip->ip_len = ntohs(ip->ip_len);
 		sctp_delayed_cksum(m);
 		m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
 		ip->ip_len = htons(ip->ip_len);
 	}
 #endif
 	/*
 	 * Record receive interface address, if any.
 	 * But only for incoming packets.
 	 */
 	bzero(&divsrc, sizeof(divsrc));
 	divsrc.sin_len = sizeof(divsrc);
 	divsrc.sin_family = AF_INET;
 	divsrc.sin_port = divert_cookie(mtag);	/* record matching rule */
 	if (incoming) {
 		struct ifaddr *ifa;
 		struct ifnet *ifp;
 
 		/* Sanity check */
 		M_ASSERTPKTHDR(m);
 
 		/* Find IP address for receive interface */
 		ifp = m->m_pkthdr.rcvif;
 		IF_ADDR_LOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
 			divsrc.sin_addr =
 			    ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr;
 			break;
 		}
 		IF_ADDR_UNLOCK(ifp);
 	}
 	/*
 	 * Record the incoming interface name whenever we have one.
 	 */
 	if (m->m_pkthdr.rcvif) {
 		/*
 		 * Hide the actual interface name in there in the 
 		 * sin_zero array. XXX This needs to be moved to a
 		 * different sockaddr type for divert, e.g.
 		 * sockaddr_div with multiple fields like 
 		 * sockaddr_dl. Presently we have only 7 bytes
 		 * but that will do for now as most interfaces
 		 * are 4 or less + 2 or less bytes for unit.
 		 * There is probably a faster way of doing this,
 		 * possibly taking it from the sockaddr_dl on the iface.
 		 * This solves the problem of a P2P link and a LAN interface
 		 * having the same address, which can result in the wrong
 		 * interface being assigned to the packet when fed back
 		 * into the divert socket. Theoretically if the daemon saves
 		 * and re-uses the sockaddr_in as suggested in the man pages,
 		 * this iface name will come along for the ride.
 		 * (see div_output for the other half of this.)
 		 */ 
 		strlcpy(divsrc.sin_zero, m->m_pkthdr.rcvif->if_xname,
 		    sizeof(divsrc.sin_zero));
 	}
 
 	/* Put packet on socket queue, if any */
 	sa = NULL;
 	nport = htons((u_int16_t)divert_info(mtag));
 	INP_INFO_RLOCK(&V_divcbinfo);
 	LIST_FOREACH(inp, &V_divcb, inp_list) {
 		/* XXX why does only one socket match? */
 		if (inp->inp_lport == nport) {
 			INP_RLOCK(inp);
 			sa = inp->inp_socket;
 			SOCKBUF_LOCK(&sa->so_rcv);
 			if (sbappendaddr_locked(&sa->so_rcv,
 			    (struct sockaddr *)&divsrc, m,
 			    (struct mbuf *)0) == 0) {
 				SOCKBUF_UNLOCK(&sa->so_rcv);
 				sa = NULL;	/* force mbuf reclaim below */
 			} else
 				sorwakeup_locked(sa);
 			INP_RUNLOCK(inp);
 			break;
 		}
 	}
 	INP_INFO_RUNLOCK(&V_divcbinfo);
 	if (sa == NULL) {
 		m_freem(m);
 		IPSTAT_INC(ips_noproto);
 		IPSTAT_DEC(ips_delivered);
         }
 }
 
 /*
  * Deliver packet back into the IP processing machinery.
  *
  * If no address specified, or address is 0.0.0.0, send to ip_output();
  * otherwise, send to ip_input() and mark as having been received on
  * the interface with that address.
  */
 static int
 div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin,
     struct mbuf *control)
 {
 	INIT_VNET_INET(curvnet);
 	struct m_tag *mtag;
 	struct divert_tag *dt;
 	int error = 0;
 	struct mbuf *options;
 
 	/*
 	 * An mbuf may hasn't come from userland, but we pretend
 	 * that it has.
 	 */
 	m->m_pkthdr.rcvif = NULL;
 	m->m_nextpkt = NULL;
 	M_SETFIB(m, so->so_fibnum);
 
 	if (control)
 		m_freem(control);		/* XXX */
 
 	if ((mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL)) == NULL) {
 		mtag = m_tag_get(PACKET_TAG_DIVERT, sizeof(struct divert_tag),
 		    M_NOWAIT | M_ZERO);
 		if (mtag == NULL) {
 			error = ENOBUFS;
 			goto cantsend;
 		}
 		dt = (struct divert_tag *)(mtag+1);
 		m_tag_prepend(m, mtag);
 	} else
 		dt = (struct divert_tag *)(mtag+1);
 
 	/* Loopback avoidance and state recovery */
 	if (sin) {
 		int i;
 
 		dt->cookie = sin->sin_port;
 		/*
 		 * Find receive interface with the given name, stuffed
 		 * (if it exists) in the sin_zero[] field.
 		 * The name is user supplied data so don't trust its size
 		 * or that it is zero terminated.
 		 */
 		for (i = 0; i < sizeof(sin->sin_zero) && sin->sin_zero[i]; i++)
 			;
 		if ( i > 0 && i < sizeof(sin->sin_zero))
 			m->m_pkthdr.rcvif = ifunit(sin->sin_zero);
 	}
 
 	/* Reinject packet into the system as incoming or outgoing */
 	if (!sin || sin->sin_addr.s_addr == 0) {
 		struct ip *const ip = mtod(m, struct ip *);
 		struct inpcb *inp;
 
 		dt->info |= IP_FW_DIVERT_OUTPUT_FLAG;
 		INP_INFO_WLOCK(&V_divcbinfo);
 		inp = sotoinpcb(so);
 		INP_RLOCK(inp);
 		/*
 		 * Don't allow both user specified and setsockopt options,
 		 * and don't allow packet length sizes that will crash
 		 */
 		if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options) ||
 		     ((u_short)ntohs(ip->ip_len) > m->m_pkthdr.len)) {
 			error = EINVAL;
 			INP_RUNLOCK(inp);
 			INP_INFO_WUNLOCK(&V_divcbinfo);
 			m_freem(m);
 		} else {
 			/* Convert fields to host order for ip_output() */
 			ip->ip_len = ntohs(ip->ip_len);
 			ip->ip_off = ntohs(ip->ip_off);
 
 			/* Send packet to output processing */
 			IPSTAT_INC(ips_rawout);			/* XXX */
 
 #ifdef MAC
 			mac_inpcb_create_mbuf(inp, m);
 #endif
 			/*
 			 * Get ready to inject the packet into ip_output().
 			 * Just in case socket options were specified on the
 			 * divert socket, we duplicate them.  This is done
 			 * to avoid having to hold the PCB locks over the call
 			 * to ip_output(), as doing this results in a number of
 			 * lock ordering complexities.
 			 *
 			 * Note that we set the multicast options argument for
 			 * ip_output() to NULL since it should be invariant that
 			 * they are not present.
 			 */
 			KASSERT(inp->inp_moptions == NULL,
 			    ("multicast options set on a divert socket"));
 			options = NULL;
 			/*
 			 * XXXCSJP: It is unclear to me whether or not it makes
 			 * sense for divert sockets to have options.  However,
 			 * for now we will duplicate them with the INP locks
 			 * held so we can use them in ip_output() without
 			 * requring a reference to the pcb.
 			 */
 			if (inp->inp_options != NULL) {
 				options = m_dup(inp->inp_options, M_DONTWAIT);
 				if (options == NULL)
 					error = ENOBUFS;
 			}
 			INP_RUNLOCK(inp);
 			INP_INFO_WUNLOCK(&V_divcbinfo);
 			if (error == ENOBUFS) {
 				m_freem(m);
 				return (error);
 			}
 			error = ip_output(m, options, NULL,
 			    ((so->so_options & SO_DONTROUTE) ?
 			    IP_ROUTETOIF : 0) | IP_ALLOWBROADCAST |
 			    IP_RAWOUTPUT, NULL, NULL);
 			if (options != NULL)
 				m_freem(options);
 		}
 	} else {
 		dt->info |= IP_FW_DIVERT_LOOPBACK_FLAG;
 		if (m->m_pkthdr.rcvif == NULL) {
 			/*
 			 * No luck with the name, check by IP address.
 			 * Clear the port and the ifname to make sure
 			 * there are no distractions for ifa_ifwithaddr.
 			 */
 			struct	ifaddr *ifa;
 
 			bzero(sin->sin_zero, sizeof(sin->sin_zero));
 			sin->sin_port = 0;
 			ifa = ifa_ifwithaddr((struct sockaddr *) sin);
 			if (ifa == NULL) {
 				error = EADDRNOTAVAIL;
 				goto cantsend;
 			}
 			m->m_pkthdr.rcvif = ifa->ifa_ifp;
 		}
 #ifdef MAC
 		SOCK_LOCK(so);
 		mac_socket_create_mbuf(so, m);
 		SOCK_UNLOCK(so);
 #endif
 		/* Send packet to input processing via netisr */
-		netisr2_queue_src(NETISR_IP, (uintptr_t)so, m);
+		netisr_queue_src(NETISR_IP, (uintptr_t)so, m);
 	}
 
 	return error;
 
 cantsend:
 	m_freem(m);
 	return error;
 }
 
 static int
 div_attach(struct socket *so, int proto, struct thread *td)
 {
 	INIT_VNET_INET(so->so_vnet);
 	struct inpcb *inp;
 	int error;
 
 	inp  = sotoinpcb(so);
 	KASSERT(inp == NULL, ("div_attach: inp != NULL"));
 	if (td != NULL) {
 		error = priv_check(td, PRIV_NETINET_DIVERT);
 		if (error)
 			return (error);
 	}
 	error = soreserve(so, div_sendspace, div_recvspace);
 	if (error)
 		return error;
 	INP_INFO_WLOCK(&V_divcbinfo);
 	error = in_pcballoc(so, &V_divcbinfo);
 	if (error) {
 		INP_INFO_WUNLOCK(&V_divcbinfo);
 		return error;
 	}
 	inp = (struct inpcb *)so->so_pcb;
 	INP_INFO_WUNLOCK(&V_divcbinfo);
 	inp->inp_ip_p = proto;
 	inp->inp_vflag |= INP_IPV4;
 	inp->inp_flags |= INP_HDRINCL;
 	INP_WUNLOCK(inp);
 	return 0;
 }
 
 static void
 div_detach(struct socket *so)
 {
 	INIT_VNET_INET(so->so_vnet);
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("div_detach: inp == NULL"));
 	INP_INFO_WLOCK(&V_divcbinfo);
 	INP_WLOCK(inp);
 	in_pcbdetach(inp);
 	in_pcbfree(inp);
 	INP_INFO_WUNLOCK(&V_divcbinfo);
 }
 
 static int
 div_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	INIT_VNET_INET(so->so_vnet);
 	struct inpcb *inp;
 	int error;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("div_bind: inp == NULL"));
 	/* in_pcbbind assumes that nam is a sockaddr_in
 	 * and in_pcbbind requires a valid address. Since divert
 	 * sockets don't we need to make sure the address is
 	 * filled in properly.
 	 * XXX -- divert should not be abusing in_pcbind
 	 * and should probably have its own family.
 	 */
 	if (nam->sa_family != AF_INET)
 		return EAFNOSUPPORT;
 	((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY;
 	INP_INFO_WLOCK(&V_divcbinfo);
 	INP_WLOCK(inp);
 	error = in_pcbbind(inp, nam, td->td_ucred);
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_divcbinfo);
 	return error;
 }
 
 static int
 div_shutdown(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("div_shutdown: inp == NULL"));
 	INP_WLOCK(inp);
 	socantsendmore(so);
 	INP_WUNLOCK(inp);
 	return 0;
 }
 
 static int
 div_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
     struct mbuf *control, struct thread *td)
 {
 	INIT_VNET_INET(so->so_vnet);
 
 	/* Packet must have a header (but that's about it) */
 	if (m->m_len < sizeof (struct ip) &&
 	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
 		IPSTAT_INC(ips_toosmall);
 		m_freem(m);
 		return EINVAL;
 	}
 
 	/* Send packet */
 	return div_output(so, m, (struct sockaddr_in *)nam, control);
 }
 
 void
 div_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
         struct in_addr faddr;
 
 	faddr = ((struct sockaddr_in *)sa)->sin_addr;
 	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
         	return;
 	if (PRC_IS_REDIRECT(cmd))
 		return;
 }
 
 static int
 div_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	INIT_VNET_INET(curvnet);
 	int error, i, n;
 	struct inpcb *inp, **inp_list;
 	inp_gen_t gencnt;
 	struct xinpgen xig;
 
 	/*
 	 * The process of preparing the TCB list is too time-consuming and
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == 0) {
 		n = V_divcbinfo.ipi_count;
 		req->oldidx = 2 * (sizeof xig)
 			+ (n + n/8) * sizeof(struct xinpcb);
 		return 0;
 	}
 
 	if (req->newptr != 0)
 		return EPERM;
 
 	/*
 	 * OK, now we're committed to doing something.
 	 */
 	INP_INFO_RLOCK(&V_divcbinfo);
 	gencnt = V_divcbinfo.ipi_gencnt;
 	n = V_divcbinfo.ipi_count;
 	INP_INFO_RUNLOCK(&V_divcbinfo);
 
 	error = sysctl_wire_old_buffer(req,
 	    2 * sizeof(xig) + n*sizeof(struct xinpcb));
 	if (error != 0)
 		return (error);
 
 	xig.xig_len = sizeof xig;
 	xig.xig_count = n;
 	xig.xig_gen = gencnt;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
 		return error;
 
 	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
 	if (inp_list == 0)
 		return ENOMEM;
 	
 	INP_INFO_RLOCK(&V_divcbinfo);
 	for (inp = LIST_FIRST(V_divcbinfo.ipi_listhead), i = 0; inp && i < n;
 	     inp = LIST_NEXT(inp, inp_list)) {
 		INP_RLOCK(inp);
 		if (inp->inp_gencnt <= gencnt &&
 		    cr_canseeinpcb(req->td->td_ucred, inp) == 0)
 			inp_list[i++] = inp;
 		INP_RUNLOCK(inp);
 	}
 	INP_INFO_RUNLOCK(&V_divcbinfo);
 	n = i;
 
 	error = 0;
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			struct xinpcb xi;
 			bzero(&xi, sizeof(xi));
 			xi.xi_len = sizeof xi;
 			/* XXX should avoid extra copy */
 			bcopy(inp, &xi.xi_inp, sizeof *inp);
 			if (inp->inp_socket)
 				sotoxsocket(inp->inp_socket, &xi.xi_socket);
 			INP_RUNLOCK(inp);
 			error = SYSCTL_OUT(req, &xi, sizeof xi);
 		} else
 			INP_RUNLOCK(inp);
 	}
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.
 		 * If the generation differs from what we told
 		 * her before, she knows that something happened
 		 * while we were processing this request, and it
 		 * might be necessary to retry.
 		 */
 		INP_INFO_RLOCK(&V_divcbinfo);
 		xig.xig_gen = V_divcbinfo.ipi_gencnt;
 		xig.xig_sogen = so_gencnt;
 		xig.xig_count = V_divcbinfo.ipi_count;
 		INP_INFO_RUNLOCK(&V_divcbinfo);
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 	free(inp_list, M_TEMP);
 	return error;
 }
 
 #ifdef SYSCTL_NODE
 SYSCTL_NODE(_net_inet, IPPROTO_DIVERT, divert, CTLFLAG_RW, 0, "IPDIVERT");
 SYSCTL_PROC(_net_inet_divert, OID_AUTO, pcblist, CTLFLAG_RD, 0, 0,
 	    div_pcblist, "S,xinpcb", "List of active divert sockets");
 #endif
 
 struct pr_usrreqs div_usrreqs = {
 	.pru_attach =		div_attach,
 	.pru_bind =		div_bind,
 	.pru_control =		in_control,
 	.pru_detach =		div_detach,
 	.pru_peeraddr =		in_getpeeraddr,
 	.pru_send =		div_send,
 	.pru_shutdown =		div_shutdown,
 	.pru_sockaddr =		in_getsockaddr,
 	.pru_sosetlabel =	in_pcbsosetlabel
 };
 
 struct protosw div_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_protocol =		IPPROTO_DIVERT,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		div_input,
 	.pr_ctlinput =		div_ctlinput,
 	.pr_ctloutput =		ip_ctloutput,
 	.pr_init =		div_init,
 	.pr_usrreqs =		&div_usrreqs
 };
 
 static int
 div_modevent(module_t mod, int type, void *unused)
 {
 	INIT_VNET_INET(curvnet); /* XXX move to iattach - revisit!!! */
 	int err = 0;
 	int n;
 
 	switch (type) {
 	case MOD_LOAD:
 		/*
 		 * Protocol will be initialized by pf_proto_register().
 		 * We don't have to register ip_protox because we are not
 		 * a true IP protocol that goes over the wire.
 		 */
 		err = pf_proto_register(PF_INET, &div_protosw);
 		ip_divert_ptr = divert_packet;
 		break;
 	case MOD_QUIESCE:
 		/*
 		 * IPDIVERT may normally not be unloaded because of the
 		 * potential race conditions.  Tell kldunload we can't be
 		 * unloaded unless the unload is forced.
 		 */
 		err = EPERM;
 		break;
 	case MOD_UNLOAD:
 		/*
 		 * Forced unload.
 		 *
 		 * Module ipdivert can only be unloaded if no sockets are
 		 * connected.  Maybe this can be changed later to forcefully
 		 * disconnect any open sockets.
 		 *
 		 * XXXRW: Note that there is a slight race here, as a new
 		 * socket open request could be spinning on the lock and then
 		 * we destroy the lock.
 		 */
 		INP_INFO_WLOCK(&V_divcbinfo);
 		n = V_divcbinfo.ipi_count;
 		if (n != 0) {
 			err = EBUSY;
 			INP_INFO_WUNLOCK(&V_divcbinfo);
 			break;
 		}
 		ip_divert_ptr = NULL;
 		err = pf_proto_unregister(PF_INET, IPPROTO_DIVERT, SOCK_RAW);
 		INP_INFO_WUNLOCK(&V_divcbinfo);
 		INP_INFO_LOCK_DESTROY(&V_divcbinfo);
 		uma_zdestroy(V_divcbinfo.ipi_zone);
 		break;
 	default:
 		err = EOPNOTSUPP;
 		break;
 	}
 	return err;
 }
 
 static moduledata_t ipdivertmod = {
         "ipdivert",
         div_modevent,
         0
 };
 
 DECLARE_MODULE(ipdivert, ipdivertmod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
 MODULE_DEPEND(dummynet, ipfw, 2, 2, 2);
 MODULE_VERSION(ipdivert, 1);
Index: projects/pnet/sys/netinet/ip_input.c
===================================================================
--- projects/pnet/sys/netinet/ip_input.c	(revision 193105)
+++ projects/pnet/sys/netinet/ip_input.c	(revision 193106)
@@ -1,1835 +1,1835 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_bootp.h"
 #include "opt_ipfw.h"
 #include "opt_ipstealth.h"
 #include "opt_ipsec.h"
 #include "opt_route.h"
 #include "opt_mac.h"
 #include "opt_carp.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/callout.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 #include <sys/vimage.h>
 
 #include <net/pfil.h>
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/netisr.h>
 #include <net/vnet.h>
 #include <net/flowtable.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_options.h>
 #include <machine/in_cksum.h>
 #include <netinet/vinet.h>
 #ifdef DEV_CARP
 #include <netinet/ip_carp.h>
 #endif
 #ifdef IPSEC
 #include <netinet/ip_ipsec.h>
 #endif /* IPSEC */
 
 #include <sys/socketvar.h>
 
 /* XXX: Temporary until ipfw_ether and ipfw_bridge are converted. */
 #include <netinet/ip_fw.h>
 #include <netinet/ip_dummynet.h>
 
 #include <security/mac/mac_framework.h>
 
 #ifdef CTASSERT
 CTASSERT(sizeof(struct ip) == 20);
 #endif
 
 #ifndef VIMAGE
 #ifndef VIMAGE_GLOBALS
 struct vnet_inet vnet_inet_0;
 #endif
 #endif
 
 #ifdef VIMAGE_GLOBALS
 static int	ipsendredirects;
 static int	ip_checkinterface;
 static int	ip_keepfaith;
 static int	ip_sendsourcequench;
 int	ip_defttl;
 int	ip_do_randomid;
 int	ipforwarding;
 struct	in_ifaddrhead in_ifaddrhead; 		/* first inet address */
 struct	in_ifaddrhashhead *in_ifaddrhashtbl;	/* inet addr hash table  */
 u_long 	in_ifaddrhmask;				/* mask for hash table */
 struct ipstat ipstat;
 static int ip_rsvp_on;
 struct socket *ip_rsvpd;
 int	rsvp_on;
 static struct ipqhead ipq[IPREASS_NHASH];
 static int	maxnipq;	/* Administrative limit on # reass queues. */
 static int	maxfragsperpacket;
 int	ipstealth;
 static int	nipq;	/* Total # of reass queues */
 #endif
 
 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, IPCTL_FORWARDING,
     forwarding, CTLFLAG_RW, ipforwarding, 0,
     "Enable IP forwarding between interfaces");
 
 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, IPCTL_SENDREDIRECTS,
     redirect, CTLFLAG_RW, ipsendredirects, 0,
     "Enable sending IP redirects");
 
 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, IPCTL_DEFTTL,
     ttl, CTLFLAG_RW, ip_defttl, 0, "Maximum TTL on IP packets");
 
 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, IPCTL_KEEPFAITH,
     keepfaith, CTLFLAG_RW, ip_keepfaith,	0,
     "Enable packet capture for FAITH IPv4->IPv6 translater daemon");
 
 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO,
     sendsourcequench, CTLFLAG_RW, ip_sendsourcequench, 0,
     "Enable the transmission of source quench packets");
 
 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO, random_id,
     CTLFLAG_RW, ip_do_randomid, 0, "Assign random ip_id values");
 
 /*
  * XXX - Setting ip_checkinterface mostly implements the receive side of
  * the Strong ES model described in RFC 1122, but since the routing table
  * and transmit implementation do not implement the Strong ES model,
  * setting this to 1 results in an odd hybrid.
  *
  * XXX - ip_checkinterface currently must be disabled if you use ipnat
  * to translate the destination address to another local interface.
  *
  * XXX - ip_checkinterface must be disabled if you add IP aliases
  * to the loopback interface instead of the interface where the
  * packets for those addresses are received.
  */
 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO,
     check_interface, CTLFLAG_RW, ip_checkinterface, 0,
     "Verify packet arrives on correct interface");
 
 struct pfil_head inet_pfil_hook;	/* Packet filter hooks */
 
 static struct mbuf	*ip_input_m2flow(struct mbuf *m, uintptr_t source);
 static struct netisr_handler ip_nh = {
 	.nh_name = "ip",
 	.nh_handler = ip_input,
 	.nh_m2flow = ip_input_m2flow,
 	.nh_proto = NETISR_IP,
 	.nh_qlimit = IFQ_MAXLEN,
 	.nh_policy = NETISR_POLICY_FLOW,
 };
 
 extern	struct domain inetdomain;
 extern	struct protosw inetsw[];
 u_char	ip_protox[IPPROTO_MAX];
 
 
 SYSCTL_V_STRUCT(V_NET, vnet_inet, _net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
     ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)");
 
 #ifdef VIMAGE_GLOBALS
 static uma_zone_t ipq_zone;
 #endif
 static struct mtx ipqlock;
 
 #define	IPQ_LOCK()	mtx_lock(&ipqlock)
 #define	IPQ_UNLOCK()	mtx_unlock(&ipqlock)
 #define	IPQ_LOCK_INIT()	mtx_init(&ipqlock, "ipqlock", NULL, MTX_DEF)
 #define	IPQ_LOCK_ASSERT()	mtx_assert(&ipqlock, MA_OWNED)
 
 static void	maxnipq_update(void);
 static void	ipq_zone_change(void *);
 
 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO, fragpackets,
     CTLFLAG_RD, nipq, 0,
     "Current number of IPv4 fragment reassembly queue entries");
 
 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO, maxfragsperpacket,
     CTLFLAG_RW, maxfragsperpacket, 0,
     "Maximum number of IPv4 fragments allowed per packet");
 
 struct callout	ipport_tick_callout;
 
 #ifdef IPCTL_DEFMTU
 SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
     &ip_mtu, 0, "Default MTU");
 #endif
 
 #ifdef IPSTEALTH
 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
     ipstealth, 0, "IP stealth mode, no TTL decrementation on forwarding");
 #endif
 static int ip_output_flowtable_size = 2048;
 TUNABLE_INT("net.inet.ip.output_flowtable_size", &ip_output_flowtable_size);
 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO, output_flowtable_size,
     CTLFLAG_RDTUN, ip_output_flowtable_size, 2048,
     "number of entries in the per-cpu output flow caches");
 
 /*
  * ipfw_ether and ipfw_bridge hooks.
  * XXX: Temporary until those are converted to pfil_hooks as well.
  */
 ip_fw_chk_t *ip_fw_chk_ptr = NULL;
 ip_dn_io_t *ip_dn_io_ptr = NULL;
 #ifdef VIMAGE_GLOBALS
 int fw_one_pass;
 #endif
 struct flowtable *ip_ft;
 
 static void	ip_freef(struct ipqhead *, struct ipq *);
 
 #ifndef VIMAGE_GLOBALS
 static void vnet_inet_register(void);
  
 static const vnet_modinfo_t vnet_inet_modinfo = {
 	.vmi_id		= VNET_MOD_INET,
 	.vmi_name	= "inet",
 	.vmi_size	= sizeof(struct vnet_inet)
 };
  
 static void vnet_inet_register()
 {
   
 	vnet_mod_register(&vnet_inet_modinfo);
 }
  
 SYSINIT(inet, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST, vnet_inet_register, 0);
 #endif
 
 static int
 sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
 {
 	int error, qlimit;
 
-	netisr2_getqlimit(&ip_nh, &qlimit);
+	netisr_getqlimit(&ip_nh, &qlimit);
 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qlimit < 1)
 		return (EINVAL);
-	return (netisr2_setqlimit(&ip_nh, qlimit));
+	return (netisr_setqlimit(&ip_nh, qlimit));
 }
 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen,
     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I",
     "Maximum size of the IP input queue");
 
 static int
 sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS)
 {
 	u_int64_t qdrops_long;
 	int error, qdrops;
 
-	netisr2_getqdrops(&ip_nh, &qdrops_long);
+	netisr_getqdrops(&ip_nh, &qdrops_long);
 	qdrops = qdrops_long;
 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qdrops != 0)
 		return (EINVAL);
-	netisr2_clearqdrops(&ip_nh);
+	netisr_clearqdrops(&ip_nh);
 	return (0);
 }
 
 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops,
     CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I",
     "Number of packets dropped from the IP input queue");
 
 static int ip_m2flow_enable = 1;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, m2flow_enable, CTLFLAG_RW,
     &ip_m2flow_enable, 0,
     "Enable software flow ID calculation for parallel netisr distribution");
 
 /*
  * IP initialization: fill in IP protocol switch table.
  * All protocols not implemented in kernel go to raw IP protocol handler.
  */
 void
 ip_init(void)
 {
 	INIT_VNET_INET(curvnet);
 	struct protosw *pr;
 	int i;
 
 	V_ipsendredirects = 1; /* XXX */
 	V_ip_checkinterface = 0;
 	V_ip_keepfaith = 0;
 	V_ip_sendsourcequench = 0;
 	V_rsvp_on = 0;
 	V_ip_defttl = IPDEFTTL;
 	V_ip_do_randomid = 0;
 	V_ip_id = time_second & 0xffff;
 	V_ipforwarding = 0;
 	V_ipstealth = 0;
 	V_nipq = 0;	/* Total # of reass queues */
 
 	V_ipport_lowfirstauto = IPPORT_RESERVED - 1;	/* 1023 */
 	V_ipport_lowlastauto = IPPORT_RESERVEDSTART;	/* 600 */
 	V_ipport_firstauto = IPPORT_EPHEMERALFIRST;	/* 10000 */
 	V_ipport_lastauto = IPPORT_EPHEMERALLAST;	/* 65535 */
 	V_ipport_hifirstauto = IPPORT_HIFIRSTAUTO;	/* 49152 */
 	V_ipport_hilastauto = IPPORT_HILASTAUTO;	/* 65535 */
 	V_ipport_reservedhigh = IPPORT_RESERVED - 1;	/* 1023 */
 	V_ipport_reservedlow = 0;
 	V_ipport_randomized = 1;	/* user controlled via sysctl */
 	V_ipport_randomcps = 10;	/* user controlled via sysctl */
 	V_ipport_randomtime = 45;	/* user controlled via sysctl */
 	V_ipport_stoprandom = 0;	/* toggled by ipport_tick */
 
 	V_fw_one_pass = 1;
 
 #ifdef NOTYET
 	/* XXX global static but not instantiated in this file */
 	V_ipfastforward_active = 0;
 	V_subnetsarelocal = 0;
 	V_sameprefixcarponly = 0;
 #endif
 
 	TAILQ_INIT(&V_in_ifaddrhead);
 	V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask);
 
 	/* Initialize IP reassembly queue. */
 	for (i = 0; i < IPREASS_NHASH; i++)
 		TAILQ_INIT(&V_ipq[i]);
 	V_maxnipq = nmbclusters / 32;
 	V_maxfragsperpacket = 16;
 	V_ipq_zone = uma_zcreate("ipq", sizeof(struct ipq), NULL, NULL, NULL,
 	    NULL, UMA_ALIGN_PTR, 0);
 	maxnipq_update();
 
 	/* Skip initialization of globals for non-default instances. */
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		panic("ip_init: PF_INET not found");
 
 	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
 	for (i = 0; i < IPPROTO_MAX; i++)
 		ip_protox[i] = pr - inetsw;
 	/*
 	 * Cycle through IP protocols and put them into the appropriate place
 	 * in ip_protox[].
 	 */
 	for (pr = inetdomain.dom_protosw;
 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
 		if (pr->pr_domain->dom_family == PF_INET &&
 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
 			/* Be careful to only index valid IP protocols. */
 			if (pr->pr_protocol < IPPROTO_MAX)
 				ip_protox[pr->pr_protocol] = pr - inetsw;
 		}
 
 	/* Initialize packet filter hooks. */
 	inet_pfil_hook.ph_type = PFIL_TYPE_AF;
 	inet_pfil_hook.ph_af = AF_INET;
 	if ((i = pfil_head_register(&inet_pfil_hook)) != 0)
 		printf("%s: WARNING: unable to register pfil hook, "
 			"error %d\n", __func__, i);
 
 	/* Start ipport_tick. */
 	callout_init(&ipport_tick_callout, CALLOUT_MPSAFE);
 	callout_reset(&ipport_tick_callout, 1, ipport_tick, NULL);
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL,
 		SHUTDOWN_PRI_DEFAULT);
 	EVENTHANDLER_REGISTER(nmbclusters_change, ipq_zone_change,
 		NULL, EVENTHANDLER_PRI_ANY);
 
 	/* Initialize various other remaining things. */
 	IPQ_LOCK_INIT();
-	netisr2_register(&ip_nh);
+	netisr_register(&ip_nh);
 	ip_ft = flowtable_alloc(ip_output_flowtable_size, FL_PCPU);
 }
 
 void
 ip_fini(void *xtp)
 {
 
 	callout_stop(&ipport_tick_callout);
 }
 
 /*
  * Calculate a flow ID for an IP packet if one isn't already present; this is
  * a subset of the work done by ip_input() necessary to validate and read the
  * IP header.  We only do stats on the packet if we drop it -- otherwise, the
  * normal input routine manages its statistics.
  */
 static struct mbuf *
 ip_input_m2flow(struct mbuf *m, uintptr_t source)
 {
 	struct ip *ip;
 	int hlen;
 
 	M_ASSERTPKTHDR(m);
 	KASSERT(!(m->m_flags & M_FLOWID),
 	    ("ip_input_m2flow: M_FLOWID already set"));
 
 	if (!ip_m2flow_enable)
 		return (m);
 
 	if (m->m_pkthdr.len < sizeof(struct ip)) {
 		IPSTAT_INC(ips_tooshort);
 		goto bad;
 	}
 	if (m->m_len < sizeof (struct ip) &&
 	    (m = m_pullup(m, sizeof(struct ip))) == NULL) {
 		IPSTAT_INC(ips_total);
 		IPSTAT_INC(ips_toosmall);
 		return (NULL);
 	}
 	ip = mtod(m, struct ip *);
 	if (ip->ip_v != IPVERSION) {
 		IPSTAT_INC(ips_badvers);
 		goto bad;
 	}
 	hlen = ip->ip_hl << 2;
 	if (hlen < sizeof(struct ip)) {
 		IPSTAT_INC(ips_badhlen);
 		goto bad;
 	}
 	m->m_flags |= M_FLOWID;
 	m->m_pkthdr.flowid = ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
 	return (m);
 
 bad:
 	IPSTAT_INC(ips_total);
 	m_freem(m);
 	return (NULL);
 }
 
 /*
  * Ip input routine.  Checksum and byte swap header.  If fragmented
  * try to reassemble.  Process options.  Pass to next level.
  */
 void
 ip_input(struct mbuf *m)
 {
 	INIT_VNET_INET(curvnet);
 	struct ip *ip = NULL;
 	struct in_ifaddr *ia = NULL;
 	struct ifaddr *ifa;
 	struct ifnet *ifp;
 	int    checkif, hlen = 0;
 	u_short sum;
 	int dchg = 0;				/* dest changed after fw */
 	struct in_addr odst;			/* original dst address */
 
 	M_ASSERTPKTHDR(m);
 
 	if (m->m_flags & M_FASTFWD_OURS) {
 		/*
 		 * Firewall or NAT changed destination to local.
 		 * We expect ip_len and ip_off to be in host byte order.
 		 */
 		m->m_flags &= ~M_FASTFWD_OURS;
 		/* Set up some basics that will be used later. */
 		ip = mtod(m, struct ip *);
 		hlen = ip->ip_hl << 2;
 		goto ours;
 	}
 
 	IPSTAT_INC(ips_total);
 
 	if (m->m_pkthdr.len < sizeof(struct ip))
 		goto tooshort;
 
 	if (m->m_len < sizeof (struct ip) &&
 	    (m = m_pullup(m, sizeof (struct ip))) == NULL) {
 		IPSTAT_INC(ips_toosmall);
 		return;
 	}
 	ip = mtod(m, struct ip *);
 
 	if (ip->ip_v != IPVERSION) {
 		IPSTAT_INC(ips_badvers);
 		goto bad;
 	}
 
 	hlen = ip->ip_hl << 2;
 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
 		IPSTAT_INC(ips_badhlen);
 		goto bad;
 	}
 	if (hlen > m->m_len) {
 		if ((m = m_pullup(m, hlen)) == NULL) {
 			IPSTAT_INC(ips_badhlen);
 			return;
 		}
 		ip = mtod(m, struct ip *);
 	}
 
 	/* 127/8 must not appear on wire - RFC1122 */
 	ifp = m->m_pkthdr.rcvif;
 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
 			IPSTAT_INC(ips_badaddr);
 			goto bad;
 		}
 	}
 
 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
 	} else {
 		if (hlen == sizeof(struct ip)) {
 			sum = in_cksum_hdr(ip);
 		} else {
 			sum = in_cksum(m, hlen);
 		}
 	}
 	if (sum) {
 		IPSTAT_INC(ips_badsum);
 		goto bad;
 	}
 
 #ifdef ALTQ
 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
 		/* packet is dropped by traffic conditioner */
 		return;
 #endif
 
 	/*
 	 * Convert fields to host representation.
 	 */
 	ip->ip_len = ntohs(ip->ip_len);
 	if (ip->ip_len < hlen) {
 		IPSTAT_INC(ips_badlen);
 		goto bad;
 	}
 	ip->ip_off = ntohs(ip->ip_off);
 
 	/*
 	 * Check that the amount of data in the buffers
 	 * is as at least much as the IP header would have us expect.
 	 * Trim mbufs if longer than we expect.
 	 * Drop packet if shorter than we expect.
 	 */
 	if (m->m_pkthdr.len < ip->ip_len) {
 tooshort:
 		IPSTAT_INC(ips_tooshort);
 		goto bad;
 	}
 	if (m->m_pkthdr.len > ip->ip_len) {
 		if (m->m_len == m->m_pkthdr.len) {
 			m->m_len = ip->ip_len;
 			m->m_pkthdr.len = ip->ip_len;
 		} else
 			m_adj(m, ip->ip_len - m->m_pkthdr.len);
 	}
 #ifdef IPSEC
 	/*
 	 * Bypass packet filtering for packets from a tunnel (gif).
 	 */
 	if (ip_ipsec_filtertunnel(m))
 		goto passin;
 #endif /* IPSEC */
 
 	/*
 	 * Run through list of hooks for input packets.
 	 *
 	 * NB: Beware of the destination address changing (e.g.
 	 *     by NAT rewriting).  When this happens, tell
 	 *     ip_forward to do the right thing.
 	 */
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (!PFIL_HOOKED(&inet_pfil_hook))
 		goto passin;
 
 	odst = ip->ip_dst;
 	if (pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_IN, NULL) != 0)
 		return;
 	if (m == NULL)			/* consumed by filter */
 		return;
 
 	ip = mtod(m, struct ip *);
 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
 	ifp = m->m_pkthdr.rcvif;
 
 #ifdef IPFIREWALL_FORWARD
 	if (m->m_flags & M_FASTFWD_OURS) {
 		m->m_flags &= ~M_FASTFWD_OURS;
 		goto ours;
 	}
 	if ((dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL)) != 0) {
 		/*
 		 * Directly ship on the packet.  This allows to forward packets
 		 * that were destined for us to some other directly connected
 		 * host.
 		 */
 		ip_forward(m, dchg);
 		return;
 	}
 #endif /* IPFIREWALL_FORWARD */
 
 passin:
 	/*
 	 * Process options and, if not destined for us,
 	 * ship it on.  ip_dooptions returns 1 when an
 	 * error was detected (causing an icmp message
 	 * to be sent and the original packet to be freed).
 	 */
 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0))
 		return;
 
         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
          * matter if it is destined to another node, or whether it is 
          * a multicast one, RSVP wants it! and prevents it from being forwarded
          * anywhere else. Also checks if the rsvp daemon is running before
 	 * grabbing the packet.
          */
 	if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP) 
 		goto ours;
 
 	/*
 	 * Check our list of addresses, to see if the packet is for us.
 	 * If we don't have any addresses, assume any unicast packet
 	 * we receive might be for us (and let the upper layers deal
 	 * with it).
 	 */
 	if (TAILQ_EMPTY(&V_in_ifaddrhead) &&
 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
 		goto ours;
 
 	/*
 	 * Enable a consistency check between the destination address
 	 * and the arrival interface for a unicast packet (the RFC 1122
 	 * strong ES model) if IP forwarding is disabled and the packet
 	 * is not locally generated and the packet is not subject to
 	 * 'ipfw fwd'.
 	 *
 	 * XXX - Checking also should be disabled if the destination
 	 * address is ipnat'ed to a different interface.
 	 *
 	 * XXX - Checking is incompatible with IP aliases added
 	 * to the loopback interface instead of the interface where
 	 * the packets are received.
 	 *
 	 * XXX - This is the case for carp vhost IPs as well so we
 	 * insert a workaround. If the packet got here, we already
 	 * checked with carp_iamatch() and carp_forus().
 	 */
 	checkif = V_ip_checkinterface && (V_ipforwarding == 0) && 
 	    ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) &&
 #ifdef DEV_CARP
 	    !ifp->if_carp &&
 #endif
 	    (dchg == 0);
 
 	/*
 	 * Check for exact addresses in the hash bucket.
 	 */
 	LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
 		/*
 		 * If the address matches, verify that the packet
 		 * arrived via the correct interface if checking is
 		 * enabled.
 		 */
 		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && 
 		    (!checkif || ia->ia_ifp == ifp))
 			goto ours;
 	}
 	/*
 	 * Check for broadcast addresses.
 	 *
 	 * Only accept broadcast packets that arrive via the matching
 	 * interface.  Reception of forwarded directed broadcasts would
 	 * be handled via ip_forward() and ether_output() with the loopback
 	 * into the stack for SIMPLEX interfaces handled by ether_output().
 	 */
 	if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) {
 		IF_ADDR_LOCK(ifp);
 	        TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
 			ia = ifatoia(ifa);
 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
 			    ip->ip_dst.s_addr) {
 				IF_ADDR_UNLOCK(ifp);
 				goto ours;
 			}
 			if (ia->ia_netbroadcast.s_addr == ip->ip_dst.s_addr) {
 				IF_ADDR_UNLOCK(ifp);
 				goto ours;
 			}
 #ifdef BOOTP_COMPAT
 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) {
 				IF_ADDR_UNLOCK(ifp);
 				goto ours;
 			}
 #endif
 		}
 		IF_ADDR_UNLOCK(ifp);
 	}
 	/* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */
 	if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 		return;
 	}
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 		if (V_ip_mrouter) {
 			/*
 			 * If we are acting as a multicast router, all
 			 * incoming multicast packets are passed to the
 			 * kernel-level multicast forwarding function.
 			 * The packet is returned (relatively) intact; if
 			 * ip_mforward() returns a non-zero value, the packet
 			 * must be discarded, else it may be accepted below.
 			 */
 			if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) {
 				IPSTAT_INC(ips_cantforward);
 				m_freem(m);
 				return;
 			}
 
 			/*
 			 * The process-level routing daemon needs to receive
 			 * all multicast IGMP packets, whether or not this
 			 * host belongs to their destination groups.
 			 */
 			if (ip->ip_p == IPPROTO_IGMP)
 				goto ours;
 			IPSTAT_INC(ips_forward);
 		}
 		/*
 		 * Assume the packet is for us, to avoid prematurely taking
 		 * a lock on the in_multi hash. Protocols must perform
 		 * their own filtering and update statistics accordingly.
 		 */
 		goto ours;
 	}
 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
 		goto ours;
 	if (ip->ip_dst.s_addr == INADDR_ANY)
 		goto ours;
 
 	/*
 	 * FAITH(Firewall Aided Internet Translator)
 	 */
 	if (ifp && ifp->if_type == IFT_FAITH) {
 		if (V_ip_keepfaith) {
 			if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP) 
 				goto ours;
 		}
 		m_freem(m);
 		return;
 	}
 
 	/*
 	 * Not for us; forward if possible and desirable.
 	 */
 	if (V_ipforwarding == 0) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 	} else {
 #ifdef IPSEC
 		if (ip_ipsec_fwd(m))
 			goto bad;
 #endif /* IPSEC */
 		ip_forward(m, dchg);
 	}
 	return;
 
 ours:
 #ifdef IPSTEALTH
 	/*
 	 * IPSTEALTH: Process non-routing options only
 	 * if the packet is destined for us.
 	 */
 	if (V_ipstealth && hlen > sizeof (struct ip) &&
 	    ip_dooptions(m, 1))
 		return;
 #endif /* IPSTEALTH */
 
 	/* Count the packet in the ip address stats */
 	if (ia != NULL) {
 		ia->ia_ifa.if_ipackets++;
 		ia->ia_ifa.if_ibytes += m->m_pkthdr.len;
 	}
 
 	/*
 	 * Attempt reassembly; if it succeeds, proceed.
 	 * ip_reass() will return a different mbuf.
 	 */
 	if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
 		m = ip_reass(m);
 		if (m == NULL)
 			return;
 		ip = mtod(m, struct ip *);
 		/* Get the header length of the reassembled packet */
 		hlen = ip->ip_hl << 2;
 	}
 
 	/*
 	 * Further protocols expect the packet length to be w/o the
 	 * IP header.
 	 */
 	ip->ip_len -= hlen;
 
 #ifdef IPSEC
 	/*
 	 * enforce IPsec policy checking if we are seeing last header.
 	 * note that we do not visit this with protocols with pcb layer
 	 * code - like udp/tcp/raw ip.
 	 */
 	if (ip_ipsec_input(m))
 		goto bad;
 #endif /* IPSEC */
 
 	/*
 	 * Switch out to protocol's input routine.
 	 */
 	IPSTAT_INC(ips_delivered);
 
 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
 	return;
 bad:
 	m_freem(m);
 }
 
 /*
  * After maxnipq has been updated, propagate the change to UMA.  The UMA zone
  * max has slightly different semantics than the sysctl, for historical
  * reasons.
  */
 static void
 maxnipq_update(void)
 {
 	INIT_VNET_INET(curvnet);
 
 	/*
 	 * -1 for unlimited allocation.
 	 */
 	if (V_maxnipq < 0)
 		uma_zone_set_max(V_ipq_zone, 0);
 	/*
 	 * Positive number for specific bound.
 	 */
 	if (V_maxnipq > 0)
 		uma_zone_set_max(V_ipq_zone, V_maxnipq);
 	/*
 	 * Zero specifies no further fragment queue allocation -- set the
 	 * bound very low, but rely on implementation elsewhere to actually
 	 * prevent allocation and reclaim current queues.
 	 */
 	if (V_maxnipq == 0)
 		uma_zone_set_max(V_ipq_zone, 1);
 }
 
 static void
 ipq_zone_change(void *tag)
 {
 	INIT_VNET_INET(curvnet);
 
 	if (V_maxnipq > 0 && V_maxnipq < (nmbclusters / 32)) {
 		V_maxnipq = nmbclusters / 32;
 		maxnipq_update();
 	}
 }
 
 static int
 sysctl_maxnipq(SYSCTL_HANDLER_ARGS)
 {
 	INIT_VNET_INET(curvnet);
 	int error, i;
 
 	i = V_maxnipq;
 	error = sysctl_handle_int(oidp, &i, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	/*
 	 * XXXRW: Might be a good idea to sanity check the argument and place
 	 * an extreme upper bound.
 	 */
 	if (i < -1)
 		return (EINVAL);
 	V_maxnipq = i;
 	maxnipq_update();
 	return (0);
 }
 
 SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets, CTLTYPE_INT|CTLFLAG_RW,
     NULL, 0, sysctl_maxnipq, "I",
     "Maximum number of IPv4 fragment reassembly queue entries");
 
 /*
  * Take incoming datagram fragment and try to reassemble it into
  * whole datagram.  If the argument is the first fragment or one
  * in between the function will return NULL and store the mbuf
  * in the fragment chain.  If the argument is the last fragment
  * the packet will be reassembled and the pointer to the new
  * mbuf returned for further processing.  Only m_tags attached
  * to the first packet/fragment are preserved.
  * The IP header is *NOT* adjusted out of iplen.
  */
 struct mbuf *
 ip_reass(struct mbuf *m)
 {
 	INIT_VNET_INET(curvnet);
 	struct ip *ip;
 	struct mbuf *p, *q, *nq, *t;
 	struct ipq *fp = NULL;
 	struct ipqhead *head;
 	int i, hlen, next;
 	u_int8_t ecn, ecn0;
 	u_short hash;
 
 	/* If maxnipq or maxfragsperpacket are 0, never accept fragments. */
 	if (V_maxnipq == 0 || V_maxfragsperpacket == 0) {
 		IPSTAT_INC(ips_fragments);
 		IPSTAT_INC(ips_fragdropped);
 		m_freem(m);
 		return (NULL);
 	}
 
 	ip = mtod(m, struct ip *);
 	hlen = ip->ip_hl << 2;
 
 	hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
 	head = &V_ipq[hash];
 	IPQ_LOCK();
 
 	/*
 	 * Look for queue of fragments
 	 * of this datagram.
 	 */
 	TAILQ_FOREACH(fp, head, ipq_list)
 		if (ip->ip_id == fp->ipq_id &&
 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
 #ifdef MAC
 		    mac_ipq_match(m, fp) &&
 #endif
 		    ip->ip_p == fp->ipq_p)
 			goto found;
 
 	fp = NULL;
 
 	/*
 	 * Attempt to trim the number of allocated fragment queues if it
 	 * exceeds the administrative limit.
 	 */
 	if ((V_nipq > V_maxnipq) && (V_maxnipq > 0)) {
 		/*
 		 * drop something from the tail of the current queue
 		 * before proceeding further
 		 */
 		struct ipq *q = TAILQ_LAST(head, ipqhead);
 		if (q == NULL) {   /* gak */
 			for (i = 0; i < IPREASS_NHASH; i++) {
 				struct ipq *r = TAILQ_LAST(&V_ipq[i], ipqhead);
 				if (r) {
 					IPSTAT_ADD(ips_fragtimeout,
 					    r->ipq_nfrags);
 					ip_freef(&V_ipq[i], r);
 					break;
 				}
 			}
 		} else {
 			IPSTAT_ADD(ips_fragtimeout, q->ipq_nfrags);
 			ip_freef(head, q);
 		}
 	}
 
 found:
 	/*
 	 * Adjust ip_len to not reflect header,
 	 * convert offset of this to bytes.
 	 */
 	ip->ip_len -= hlen;
 	if (ip->ip_off & IP_MF) {
 		/*
 		 * Make sure that fragments have a data length
 		 * that's a non-zero multiple of 8 bytes.
 		 */
 		if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
 			IPSTAT_INC(ips_toosmall); /* XXX */
 			goto dropfrag;
 		}
 		m->m_flags |= M_FRAG;
 	} else
 		m->m_flags &= ~M_FRAG;
 	ip->ip_off <<= 3;
 
 
 	/*
 	 * Attempt reassembly; if it succeeds, proceed.
 	 * ip_reass() will return a different mbuf.
 	 */
 	IPSTAT_INC(ips_fragments);
 	m->m_pkthdr.header = ip;
 
 	/* Previous ip_reass() started here. */
 	/*
 	 * Presence of header sizes in mbufs
 	 * would confuse code below.
 	 */
 	m->m_data += hlen;
 	m->m_len -= hlen;
 
 	/*
 	 * If first fragment to arrive, create a reassembly queue.
 	 */
 	if (fp == NULL) {
 		fp = uma_zalloc(V_ipq_zone, M_NOWAIT);
 		if (fp == NULL)
 			goto dropfrag;
 #ifdef MAC
 		if (mac_ipq_init(fp, M_NOWAIT) != 0) {
 			uma_zfree(V_ipq_zone, fp);
 			fp = NULL;
 			goto dropfrag;
 		}
 		mac_ipq_create(m, fp);
 #endif
 		TAILQ_INSERT_HEAD(head, fp, ipq_list);
 		V_nipq++;
 		fp->ipq_nfrags = 1;
 		fp->ipq_ttl = IPFRAGTTL;
 		fp->ipq_p = ip->ip_p;
 		fp->ipq_id = ip->ip_id;
 		fp->ipq_src = ip->ip_src;
 		fp->ipq_dst = ip->ip_dst;
 		fp->ipq_frags = m;
 		m->m_nextpkt = NULL;
 		goto done;
 	} else {
 		fp->ipq_nfrags++;
 #ifdef MAC
 		mac_ipq_update(m, fp);
 #endif
 	}
 
 #define GETIP(m)	((struct ip*)((m)->m_pkthdr.header))
 
 	/*
 	 * Handle ECN by comparing this segment with the first one;
 	 * if CE is set, do not lose CE.
 	 * drop if CE and not-ECT are mixed for the same packet.
 	 */
 	ecn = ip->ip_tos & IPTOS_ECN_MASK;
 	ecn0 = GETIP(fp->ipq_frags)->ip_tos & IPTOS_ECN_MASK;
 	if (ecn == IPTOS_ECN_CE) {
 		if (ecn0 == IPTOS_ECN_NOTECT)
 			goto dropfrag;
 		if (ecn0 != IPTOS_ECN_CE)
 			GETIP(fp->ipq_frags)->ip_tos |= IPTOS_ECN_CE;
 	}
 	if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT)
 		goto dropfrag;
 
 	/*
 	 * Find a segment which begins after this one does.
 	 */
 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
 		if (GETIP(q)->ip_off > ip->ip_off)
 			break;
 
 	/*
 	 * If there is a preceding segment, it may provide some of
 	 * our data already.  If so, drop the data from the incoming
 	 * segment.  If it provides all of our data, drop us, otherwise
 	 * stick new segment in the proper place.
 	 *
 	 * If some of the data is dropped from the the preceding
 	 * segment, then it's checksum is invalidated.
 	 */
 	if (p) {
 		i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
 		if (i > 0) {
 			if (i >= ip->ip_len)
 				goto dropfrag;
 			m_adj(m, i);
 			m->m_pkthdr.csum_flags = 0;
 			ip->ip_off += i;
 			ip->ip_len -= i;
 		}
 		m->m_nextpkt = p->m_nextpkt;
 		p->m_nextpkt = m;
 	} else {
 		m->m_nextpkt = fp->ipq_frags;
 		fp->ipq_frags = m;
 	}
 
 	/*
 	 * While we overlap succeeding segments trim them or,
 	 * if they are completely covered, dequeue them.
 	 */
 	for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
 	     q = nq) {
 		i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off;
 		if (i < GETIP(q)->ip_len) {
 			GETIP(q)->ip_len -= i;
 			GETIP(q)->ip_off += i;
 			m_adj(q, i);
 			q->m_pkthdr.csum_flags = 0;
 			break;
 		}
 		nq = q->m_nextpkt;
 		m->m_nextpkt = nq;
 		IPSTAT_INC(ips_fragdropped);
 		fp->ipq_nfrags--;
 		m_freem(q);
 	}
 
 	/*
 	 * Check for complete reassembly and perform frag per packet
 	 * limiting.
 	 *
 	 * Frag limiting is performed here so that the nth frag has
 	 * a chance to complete the packet before we drop the packet.
 	 * As a result, n+1 frags are actually allowed per packet, but
 	 * only n will ever be stored. (n = maxfragsperpacket.)
 	 *
 	 */
 	next = 0;
 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
 		if (GETIP(q)->ip_off != next) {
 			if (fp->ipq_nfrags > V_maxfragsperpacket) {
 				IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
 				ip_freef(head, fp);
 			}
 			goto done;
 		}
 		next += GETIP(q)->ip_len;
 	}
 	/* Make sure the last packet didn't have the IP_MF flag */
 	if (p->m_flags & M_FRAG) {
 		if (fp->ipq_nfrags > V_maxfragsperpacket) {
 			IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
 			ip_freef(head, fp);
 		}
 		goto done;
 	}
 
 	/*
 	 * Reassembly is complete.  Make sure the packet is a sane size.
 	 */
 	q = fp->ipq_frags;
 	ip = GETIP(q);
 	if (next + (ip->ip_hl << 2) > IP_MAXPACKET) {
 		IPSTAT_INC(ips_toolong);
 		IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
 		ip_freef(head, fp);
 		goto done;
 	}
 
 	/*
 	 * Concatenate fragments.
 	 */
 	m = q;
 	t = m->m_next;
 	m->m_next = NULL;
 	m_cat(m, t);
 	nq = q->m_nextpkt;
 	q->m_nextpkt = NULL;
 	for (q = nq; q != NULL; q = nq) {
 		nq = q->m_nextpkt;
 		q->m_nextpkt = NULL;
 		m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
 		m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
 		m_cat(m, q);
 	}
 	/*
 	 * In order to do checksumming faster we do 'end-around carry' here
 	 * (and not in for{} loop), though it implies we are not going to
 	 * reassemble more than 64k fragments.
 	 */
 	m->m_pkthdr.csum_data =
 	    (m->m_pkthdr.csum_data & 0xffff) + (m->m_pkthdr.csum_data >> 16);
 #ifdef MAC
 	mac_ipq_reassemble(fp, m);
 	mac_ipq_destroy(fp);
 #endif
 
 	/*
 	 * Create header for new ip packet by modifying header of first
 	 * packet;  dequeue and discard fragment reassembly header.
 	 * Make header visible.
 	 */
 	ip->ip_len = (ip->ip_hl << 2) + next;
 	ip->ip_src = fp->ipq_src;
 	ip->ip_dst = fp->ipq_dst;
 	TAILQ_REMOVE(head, fp, ipq_list);
 	V_nipq--;
 	uma_zfree(V_ipq_zone, fp);
 	m->m_len += (ip->ip_hl << 2);
 	m->m_data -= (ip->ip_hl << 2);
 	/* some debugging cruft by sklower, below, will go away soon */
 	if (m->m_flags & M_PKTHDR)	/* XXX this should be done elsewhere */
 		m_fixhdr(m);
 	IPSTAT_INC(ips_reassembled);
 	IPQ_UNLOCK();
 	return (m);
 
 dropfrag:
 	IPSTAT_INC(ips_fragdropped);
 	if (fp != NULL)
 		fp->ipq_nfrags--;
 	m_freem(m);
 done:
 	IPQ_UNLOCK();
 	return (NULL);
 
 #undef GETIP
 }
 
 /*
  * Free a fragment reassembly header and all
  * associated datagrams.
  */
 static void
 ip_freef(struct ipqhead *fhp, struct ipq *fp)
 {
 	INIT_VNET_INET(curvnet);
 	struct mbuf *q;
 
 	IPQ_LOCK_ASSERT();
 
 	while (fp->ipq_frags) {
 		q = fp->ipq_frags;
 		fp->ipq_frags = q->m_nextpkt;
 		m_freem(q);
 	}
 	TAILQ_REMOVE(fhp, fp, ipq_list);
 	uma_zfree(V_ipq_zone, fp);
 	V_nipq--;
 }
 
 /*
  * IP timer processing;
  * if a timer expires on a reassembly
  * queue, discard it.
  */
 void
 ip_slowtimo(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 	struct ipq *fp;
 	int i;
 
 	IPQ_LOCK();
 	VNET_LIST_RLOCK();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		INIT_VNET_INET(vnet_iter);
 		for (i = 0; i < IPREASS_NHASH; i++) {
 			for(fp = TAILQ_FIRST(&V_ipq[i]); fp;) {
 				struct ipq *fpp;
 
 				fpp = fp;
 				fp = TAILQ_NEXT(fp, ipq_list);
 				if(--fpp->ipq_ttl == 0) {
 					IPSTAT_ADD(ips_fragtimeout,
 					    fpp->ipq_nfrags);
 					ip_freef(&V_ipq[i], fpp);
 				}
 			}
 		}
 		/*
 		 * If we are over the maximum number of fragments
 		 * (due to the limit being lowered), drain off
 		 * enough to get down to the new limit.
 		 */
 		if (V_maxnipq >= 0 && V_nipq > V_maxnipq) {
 			for (i = 0; i < IPREASS_NHASH; i++) {
 				while (V_nipq > V_maxnipq &&
 				    !TAILQ_EMPTY(&V_ipq[i])) {
 					IPSTAT_ADD(ips_fragdropped,
 					    TAILQ_FIRST(&V_ipq[i])->ipq_nfrags);
 					ip_freef(&V_ipq[i],
 					    TAILQ_FIRST(&V_ipq[i]));
 				}
 			}
 		}
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK();
 	IPQ_UNLOCK();
 }
 
 /*
  * Drain off all datagram fragments.
  */
 void
 ip_drain(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 	int     i;
 
 	IPQ_LOCK();
 	VNET_LIST_RLOCK();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		INIT_VNET_INET(vnet_iter);
 		for (i = 0; i < IPREASS_NHASH; i++) {
 			while(!TAILQ_EMPTY(&V_ipq[i])) {
 				IPSTAT_ADD(ips_fragdropped,
 				    TAILQ_FIRST(&V_ipq[i])->ipq_nfrags);
 				ip_freef(&V_ipq[i], TAILQ_FIRST(&V_ipq[i]));
 			}
 		}
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK();
 	IPQ_UNLOCK();
 	in_rtqdrain();
 }
 
 /*
  * The protocol to be inserted into ip_protox[] must be already registered
  * in inetsw[], either statically or through pf_proto_register().
  */
 int
 ipproto_register(u_char ipproto)
 {
 	struct protosw *pr;
 
 	/* Sanity checks. */
 	if (ipproto == 0)
 		return (EPROTONOSUPPORT);
 
 	/*
 	 * The protocol slot must not be occupied by another protocol
 	 * already.  An index pointing to IPPROTO_RAW is unused.
 	 */
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		return (EPFNOSUPPORT);
 	if (ip_protox[ipproto] != pr - inetsw)	/* IPPROTO_RAW */
 		return (EEXIST);
 
 	/* Find the protocol position in inetsw[] and set the index. */
 	for (pr = inetdomain.dom_protosw;
 	     pr < inetdomain.dom_protoswNPROTOSW; pr++) {
 		if (pr->pr_domain->dom_family == PF_INET &&
 		    pr->pr_protocol && pr->pr_protocol == ipproto) {
 			/* Be careful to only index valid IP protocols. */
 			if (pr->pr_protocol < IPPROTO_MAX) {
 				ip_protox[pr->pr_protocol] = pr - inetsw;
 				return (0);
 			} else
 				return (EINVAL);
 		}
 	}
 	return (EPROTONOSUPPORT);
 }
 
 int
 ipproto_unregister(u_char ipproto)
 {
 	struct protosw *pr;
 
 	/* Sanity checks. */
 	if (ipproto == 0)
 		return (EPROTONOSUPPORT);
 
 	/* Check if the protocol was indeed registered. */
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		return (EPFNOSUPPORT);
 	if (ip_protox[ipproto] == pr - inetsw)  /* IPPROTO_RAW */
 		return (ENOENT);
 
 	/* Reset the protocol slot to IPPROTO_RAW. */
 	ip_protox[ipproto] = pr - inetsw;
 	return (0);
 }
 
 /*
  * Given address of next destination (final or next hop),
  * return internet address info of interface to be used to get there.
  */
 struct in_ifaddr *
 ip_rtaddr(struct in_addr dst, u_int fibnum)
 {
 	struct route sro;
 	struct sockaddr_in *sin;
 	struct in_ifaddr *ifa;
 
 	bzero(&sro, sizeof(sro));
 	sin = (struct sockaddr_in *)&sro.ro_dst;
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 	sin->sin_addr = dst;
 	in_rtalloc_ign(&sro, 0, fibnum);
 
 	if (sro.ro_rt == NULL)
 		return (NULL);
 
 	ifa = ifatoia(sro.ro_rt->rt_ifa);
 	RTFREE(sro.ro_rt);
 	return (ifa);
 }
 
 u_char inetctlerrmap[PRC_NCMDS] = {
 	0,		0,		0,		0,
 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
 	0,		0,		EHOSTUNREACH,	0,
 	ENOPROTOOPT,	ECONNREFUSED
 };
 
 /*
  * Forward a packet.  If some error occurs return the sender
  * an icmp packet.  Note we can't always generate a meaningful
  * icmp message because icmp doesn't have a large enough repertoire
  * of codes and types.
  *
  * If not forwarding, just drop the packet.  This could be confusing
  * if ipforwarding was zero but some routing protocol was advancing
  * us as a gateway to somewhere.  However, we must let the routing
  * protocol deal with that.
  *
  * The srcrt parameter indicates whether the packet is being forwarded
  * via a source route.
  */
 void
 ip_forward(struct mbuf *m, int srcrt)
 {
 	INIT_VNET_INET(curvnet);
 	struct ip *ip = mtod(m, struct ip *);
 	struct in_ifaddr *ia;
 	struct mbuf *mcopy;
 	struct in_addr dest;
 	struct route ro;
 	int error, type = 0, code = 0, mtu = 0;
 
 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 		return;
 	}
 #ifdef IPSTEALTH
 	if (!V_ipstealth) {
 #endif
 		if (ip->ip_ttl <= IPTTLDEC) {
 			icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
 			    0, 0);
 			return;
 		}
 #ifdef IPSTEALTH
 	}
 #endif
 
 	ia = ip_rtaddr(ip->ip_dst, M_GETFIB(m));
 #ifndef IPSEC
 	/*
 	 * 'ia' may be NULL if there is no route for this destination.
 	 * In case of IPsec, Don't discard it just yet, but pass it to
 	 * ip_output in case of outgoing IPsec policy.
 	 */
 	if (!srcrt && ia == NULL) {
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
 		return;
 	}
 #endif
 
 	/*
 	 * Save the IP header and at most 8 bytes of the payload,
 	 * in case we need to generate an ICMP message to the src.
 	 *
 	 * XXX this can be optimized a lot by saving the data in a local
 	 * buffer on the stack (72 bytes at most), and only allocating the
 	 * mbuf if really necessary. The vast majority of the packets
 	 * are forwarded without having to send an ICMP back (either
 	 * because unnecessary, or because rate limited), so we are
 	 * really we are wasting a lot of work here.
 	 *
 	 * We don't use m_copy() because it might return a reference
 	 * to a shared cluster. Both this function and ip_output()
 	 * assume exclusive access to the IP header in `m', so any
 	 * data in a cluster may change before we reach icmp_error().
 	 */
 	MGETHDR(mcopy, M_DONTWAIT, m->m_type);
 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) {
 		/*
 		 * It's probably ok if the pkthdr dup fails (because
 		 * the deep copy of the tag chain failed), but for now
 		 * be conservative and just discard the copy since
 		 * code below may some day want the tags.
 		 */
 		m_free(mcopy);
 		mcopy = NULL;
 	}
 	if (mcopy != NULL) {
 		mcopy->m_len = min(ip->ip_len, M_TRAILINGSPACE(mcopy));
 		mcopy->m_pkthdr.len = mcopy->m_len;
 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
 	}
 
 #ifdef IPSTEALTH
 	if (!V_ipstealth) {
 #endif
 		ip->ip_ttl -= IPTTLDEC;
 #ifdef IPSTEALTH
 	}
 #endif
 
 	/*
 	 * If forwarding packet using same interface that it came in on,
 	 * perhaps should send a redirect to sender to shortcut a hop.
 	 * Only send redirect if source is sending directly to us,
 	 * and if packet was not source routed (or has any options).
 	 * Also, don't send redirect if forwarding using a default route
 	 * or a route modified by a redirect.
 	 */
 	dest.s_addr = 0;
 	if (!srcrt && V_ipsendredirects &&
 	    ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
 		struct sockaddr_in *sin;
 		struct rtentry *rt;
 
 		bzero(&ro, sizeof(ro));
 		sin = (struct sockaddr_in *)&ro.ro_dst;
 		sin->sin_family = AF_INET;
 		sin->sin_len = sizeof(*sin);
 		sin->sin_addr = ip->ip_dst;
 		in_rtalloc_ign(&ro, 0, M_GETFIB(m));
 
 		rt = ro.ro_rt;
 
 		if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
 		    satosin(rt_key(rt))->sin_addr.s_addr != 0) {
 #define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
 			u_long src = ntohl(ip->ip_src.s_addr);
 
 			if (RTA(rt) &&
 			    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
 				if (rt->rt_flags & RTF_GATEWAY)
 					dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr;
 				else
 					dest.s_addr = ip->ip_dst.s_addr;
 				/* Router requirements says to only send host redirects */
 				type = ICMP_REDIRECT;
 				code = ICMP_REDIRECT_HOST;
 			}
 		}
 		if (rt)
 			RTFREE(rt);
 	}
 
 	/*
 	 * Try to cache the route MTU from ip_output so we can consider it for
 	 * the ICMP_UNREACH_NEEDFRAG "Next-Hop MTU" field described in RFC1191.
 	 */
 	bzero(&ro, sizeof(ro));
 
 	error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
 
 	if (error == EMSGSIZE && ro.ro_rt)
 		mtu = ro.ro_rt->rt_rmx.rmx_mtu;
 	if (ro.ro_rt)
 		RTFREE(ro.ro_rt);
 
 	if (error)
 		IPSTAT_INC(ips_cantforward);
 	else {
 		IPSTAT_INC(ips_forward);
 		if (type)
 			IPSTAT_INC(ips_redirectsent);
 		else {
 			if (mcopy)
 				m_freem(mcopy);
 			return;
 		}
 	}
 	if (mcopy == NULL)
 		return;
 
 	switch (error) {
 
 	case 0:				/* forwarded, but need redirect */
 		/* type, code set above */
 		break;
 
 	case ENETUNREACH:
 	case EHOSTUNREACH:
 	case ENETDOWN:
 	case EHOSTDOWN:
 	default:
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_HOST;
 		break;
 
 	case EMSGSIZE:
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_NEEDFRAG;
 
 #ifdef IPSEC
 		/* 
 		 * If IPsec is configured for this path,
 		 * override any possibly mtu value set by ip_output.
 		 */ 
 		mtu = ip_ipsec_mtu(m, mtu);
 #endif /* IPSEC */
 		/*
 		 * If the MTU was set before make sure we are below the
 		 * interface MTU.
 		 * If the MTU wasn't set before use the interface mtu or
 		 * fall back to the next smaller mtu step compared to the
 		 * current packet size.
 		 */
 		if (mtu != 0) {
 			if (ia != NULL)
 				mtu = min(mtu, ia->ia_ifp->if_mtu);
 		} else {
 			if (ia != NULL)
 				mtu = ia->ia_ifp->if_mtu;
 			else
 				mtu = ip_next_mtu(ip->ip_len, 0);
 		}
 		IPSTAT_INC(ips_cantfrag);
 		break;
 
 	case ENOBUFS:
 		/*
 		 * A router should not generate ICMP_SOURCEQUENCH as
 		 * required in RFC1812 Requirements for IP Version 4 Routers.
 		 * Source quench could be a big problem under DoS attacks,
 		 * or if the underlying interface is rate-limited.
 		 * Those who need source quench packets may re-enable them
 		 * via the net.inet.ip.sendsourcequench sysctl.
 		 */
 		if (V_ip_sendsourcequench == 0) {
 			m_freem(mcopy);
 			return;
 		} else {
 			type = ICMP_SOURCEQUENCH;
 			code = 0;
 		}
 		break;
 
 	case EACCES:			/* ipfw denied packet */
 		m_freem(mcopy);
 		return;
 	}
 	icmp_error(mcopy, type, code, dest.s_addr, mtu);
 }
 
 void
 ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
     struct mbuf *m)
 {
 	INIT_VNET_NET(inp->inp_vnet);
 
 	if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) {
 		struct bintime bt;
 
 		bintime(&bt);
 		if (inp->inp_socket->so_options & SO_BINTIME) {
 			*mp = sbcreatecontrol((caddr_t) &bt, sizeof(bt),
 			SCM_BINTIME, SOL_SOCKET);
 			if (*mp)
 				mp = &(*mp)->m_next;
 		}
 		if (inp->inp_socket->so_options & SO_TIMESTAMP) {
 			struct timeval tv;
 
 			bintime2timeval(&bt, &tv);
 			*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
 				SCM_TIMESTAMP, SOL_SOCKET);
 			if (*mp)
 				mp = &(*mp)->m_next;
 		}
 	}
 	if (inp->inp_flags & INP_RECVDSTADDR) {
 		*mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	if (inp->inp_flags & INP_RECVTTL) {
 		*mp = sbcreatecontrol((caddr_t) &ip->ip_ttl,
 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 #ifdef notyet
 	/* XXX
 	 * Moving these out of udp_input() made them even more broken
 	 * than they already were.
 	 */
 	/* options were tossed already */
 	if (inp->inp_flags & INP_RECVOPTS) {
 		*mp = sbcreatecontrol((caddr_t) opts_deleted_above,
 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	/* ip_srcroute doesn't do what we want here, need to fix */
 	if (inp->inp_flags & INP_RECVRETOPTS) {
 		*mp = sbcreatecontrol((caddr_t) ip_srcroute(m),
 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 #endif
 	if (inp->inp_flags & INP_RECVIF) {
 		struct ifnet *ifp;
 		struct sdlbuf {
 			struct sockaddr_dl sdl;
 			u_char	pad[32];
 		} sdlbuf;
 		struct sockaddr_dl *sdp;
 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
 
 		if (((ifp = m->m_pkthdr.rcvif)) 
 		&& ( ifp->if_index && (ifp->if_index <= V_if_index))) {
 			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
 			/*
 			 * Change our mind and don't try copy.
 			 */
 			if ((sdp->sdl_family != AF_LINK)
 			|| (sdp->sdl_len > sizeof(sdlbuf))) {
 				goto makedummy;
 			}
 			bcopy(sdp, sdl2, sdp->sdl_len);
 		} else {
 makedummy:	
 			sdl2->sdl_len
 				= offsetof(struct sockaddr_dl, sdl_data[0]);
 			sdl2->sdl_family = AF_LINK;
 			sdl2->sdl_index = 0;
 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
 		}
 		*mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
 			IP_RECVIF, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 }
 
 /*
  * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the
  * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on
  * locking.  This code remains in ip_input.c as ip_mroute.c is optionally
  * compiled.
  */
 int
 ip_rsvp_init(struct socket *so)
 {
 	INIT_VNET_INET(so->so_vnet);
 
 	if (so->so_type != SOCK_RAW ||
 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
 		return EOPNOTSUPP;
 
 	if (V_ip_rsvpd != NULL)
 		return EADDRINUSE;
 
 	V_ip_rsvpd = so;
 	/*
 	 * This may seem silly, but we need to be sure we don't over-increment
 	 * the RSVP counter, in case something slips up.
 	 */
 	if (!V_ip_rsvp_on) {
 		V_ip_rsvp_on = 1;
 		V_rsvp_on++;
 	}
 
 	return 0;
 }
 
 int
 ip_rsvp_done(void)
 {
 	INIT_VNET_INET(curvnet);
 
 	V_ip_rsvpd = NULL;
 	/*
 	 * This may seem silly, but we need to be sure we don't over-decrement
 	 * the RSVP counter, in case something slips up.
 	 */
 	if (V_ip_rsvp_on) {
 		V_ip_rsvp_on = 0;
 		V_rsvp_on--;
 	}
 	return 0;
 }
 
 void
 rsvp_input(struct mbuf *m, int off)	/* XXX must fixup manually */
 {
 	INIT_VNET_INET(curvnet);
 
 	if (rsvp_input_p) { /* call the real one if loaded */
 		rsvp_input_p(m, off);
 		return;
 	}
 
 	/* Can still get packets with rsvp_on = 0 if there is a local member
 	 * of the group to which the RSVP packet is addressed.  But in this
 	 * case we want to throw the packet away.
 	 */
 	
 	if (!V_rsvp_on) {
 		m_freem(m);
 		return;
 	}
 
 	if (V_ip_rsvpd != NULL) { 
 		rip_input(m, off);
 		return;
 	}
 	/* Drop the packet */
 	m_freem(m);
 }
Index: projects/pnet/sys/netinet6/ip6_input.c
===================================================================
--- projects/pnet/sys/netinet6/ip6_input.c	(revision 193105)
+++ projects/pnet/sys/netinet6/ip6_input.c	(revision 193106)
@@ -1,1723 +1,1723 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: ip6_input.c,v 1.259 2002/01/21 04:58:09 jinmei Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_route.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 #include <sys/vimage.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/netisr.h>
 #include <net/pfil.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <net/if_llatbl.h>
 #ifdef INET
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/vinet.h>
 #endif /* INET */
 #include <netinet/ip6.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/icmp6.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/nd6.h>
 #include <netinet6/vinet6.h>
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netinet6/ip6_ipsec.h>
 #include <netipsec/ipsec6.h>
 #endif /* IPSEC */
 
 #include <netinet6/ip6protosw.h>
 
 extern struct domain inet6domain;
 
 u_char ip6_protox[IPPROTO_MAX];
 
 static struct netisr_handler ip6_nh = {
 	.nh_name = "ip6",
 	.nh_handler = ip6_input,
 	.nh_proto = NETISR_IPV6,
 	.nh_qlimit = IFQ_MAXLEN,
 	.nh_policy = NETISR_POLICY_FLOW,
 };
 
 #ifndef VIMAGE
 #ifndef VIMAGE_GLOBALS
 struct vnet_inet6 vnet_inet6_0;
 #endif
 #endif
 
 #ifdef VIMAGE_GLOBALS
 static int ip6qmaxlen;
 struct in6_ifaddr *in6_ifaddr;
 struct ip6stat ip6stat;
 
 extern struct callout in6_tmpaddrtimer_ch;
 
 extern int dad_init;
 extern int pmtu_expire;
 extern int pmtu_probe;
 extern u_long rip6_sendspace;
 extern u_long rip6_recvspace;
 extern int icmp6errppslim;
 extern int icmp6_nodeinfo;
 extern int udp6_sendspace;
 extern int udp6_recvspace;
 #endif
 
 struct pfil_head inet6_pfil_hook;
 
 static void ip6_init2(void *);
 static struct ip6aux *ip6_setdstifaddr(struct mbuf *, struct in6_ifaddr *);
 static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
 #ifdef PULLDOWN_TEST
 static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int);
 #endif
 
 #ifndef VIMAGE_GLOBALS
 static void vnet_inet6_register(void);
  
 static const vnet_modinfo_t vnet_inet6_modinfo = {
 	.vmi_id		= VNET_MOD_INET6,
 	.vmi_name	= "inet6",
 	.vmi_size	= sizeof(struct vnet_inet6),
 	.vmi_dependson	= VNET_MOD_INET	/* XXX revisit - TCP/UDP needs this? */
 };
  
 static void
 vnet_inet6_register(void)
 {
 
 	vnet_mod_register(&vnet_inet6_modinfo);
 }
  
 SYSINIT(inet6, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST, vnet_inet6_register, 0);
 #endif
 
 /*
  * IP6 initialization: fill in IP6 protocol switch table.
  * All protocols not implemented in kernel go to raw IP6 protocol handler.
  */
 void
 ip6_init(void)
 {
 	INIT_VNET_INET6(curvnet);
 	struct ip6protosw *pr;
 	int i;
 
 	V_ip6qmaxlen = IFQ_MAXLEN;
 	V_in6_maxmtu = 0;
 #ifdef IP6_AUTO_LINKLOCAL
 	V_ip6_auto_linklocal = IP6_AUTO_LINKLOCAL;
 #else
 	V_ip6_auto_linklocal = 1;	/* enable by default */
 #endif
 	TUNABLE_INT_FETCH("net.inet6.ip6.auto_linklocal",
 	    &V_ip6_auto_linklocal);
 
 #ifndef IPV6FORWARDING
 #ifdef GATEWAY6
 #define IPV6FORWARDING	1	/* forward IP6 packets not for us */
 #else
 #define IPV6FORWARDING	0	/* don't forward IP6 packets not for us */
 #endif /* GATEWAY6 */
 #endif /* !IPV6FORWARDING */
 
 #ifndef IPV6_SENDREDIRECTS
 #define IPV6_SENDREDIRECTS	1
 #endif
 
 	V_ip6_forwarding = IPV6FORWARDING; /* act as router? */
 	V_ip6_sendredirects = IPV6_SENDREDIRECTS;
 	V_ip6_defhlim = IPV6_DEFHLIM;
 	V_ip6_defmcasthlim = IPV6_DEFAULT_MULTICAST_HOPS;
 	V_ip6_accept_rtadv = 0;	 /* "IPV6FORWARDING ? 0 : 1" is dangerous */
 	V_ip6_log_interval = 5;
 	V_ip6_hdrnestlimit = 15; /* How many header options will we process? */
 	V_ip6_dad_count = 1;	 /* DupAddrDetectionTransmits */
 	V_ip6_auto_flowlabel = 1;
 	V_ip6_use_deprecated = 1;/* allow deprecated addr (RFC2462 5.5.4) */
 	V_ip6_rr_prune = 5;	 /* router renumbering prefix
                                   * walk list every 5 sec. */
 	V_ip6_mcast_pmtu = 0;	 /* enable pMTU discovery for multicast? */
 	V_ip6_v6only = 1;
 	V_ip6_keepfaith = 0;
 	V_ip6_log_time = (time_t)0L;
 #ifdef IPSTEALTH
 	V_ip6stealth = 0;
 #endif
 	V_nd6_onlink_ns_rfc4861 = 0; /* allow 'on-link' nd6 NS (RFC 4861) */
 
 	V_pmtu_expire = 60*10;
 	V_pmtu_probe = 60*2;
 
 	/* raw IP6 parameters */
 	/*
 	 * Nominal space allocated to a raw ip socket.
 	 */
 #define RIPV6SNDQ	8192
 #define RIPV6RCVQ	8192
 	V_rip6_sendspace = RIPV6SNDQ;
 	V_rip6_recvspace = RIPV6RCVQ;
 
 	/* ICMPV6 parameters */
 	V_icmp6_rediraccept = 1;	/* accept and process redirects */
 	V_icmp6_redirtimeout = 10 * 60;	/* 10 minutes */
 	V_icmp6errppslim = 100;		/* 100pps */
 	/* control how to respond to NI queries */
 	V_icmp6_nodeinfo = (ICMP6_NODEINFO_FQDNOK|ICMP6_NODEINFO_NODEADDROK);
 
 	/* UDP on IP6 parameters */
 	V_udp6_sendspace = 9216;	/* really max datagram size */
 	V_udp6_recvspace = 40 * (1024 + sizeof(struct sockaddr_in6));
 					/* 40 1K datagrams */
 	V_dad_init = 0;
 
 	scope6_init();
 	addrsel_policy_init();
 	nd6_init();
 	frag6_init();
 
 	V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR;
 
 	/* Skip global initialization stuff for non-default instances. */
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 
 #ifdef DIAGNOSTIC
 	if (sizeof(struct protosw) != sizeof(struct ip6protosw))
 		panic("sizeof(protosw) != sizeof(ip6protosw)");
 #endif
 	pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		panic("ip6_init");
 
 	/* Initialize the entire ip6_protox[] array to IPPROTO_RAW. */
 	for (i = 0; i < IPPROTO_MAX; i++)
 		ip6_protox[i] = pr - inet6sw;
 	/*
 	 * Cycle through IP protocols and put them into the appropriate place
 	 * in ip6_protox[].
 	 */
 	for (pr = (struct ip6protosw *)inet6domain.dom_protosw;
 	    pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++)
 		if (pr->pr_domain->dom_family == PF_INET6 &&
 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
 			/* Be careful to only index valid IP protocols. */
 			if (pr->pr_protocol < IPPROTO_MAX)
 				ip6_protox[pr->pr_protocol] = pr - inet6sw;
 		}
 
 	/* Initialize packet filter hooks. */
 	inet6_pfil_hook.ph_type = PFIL_TYPE_AF;
 	inet6_pfil_hook.ph_af = AF_INET6;
 	if ((i = pfil_head_register(&inet6_pfil_hook)) != 0)
 		printf("%s: WARNING: unable to register pfil hook, "
 			"error %d\n", __func__, i);
 
 	ip6_nh.nh_qlimit = V_ip6qmaxlen;
-	netisr2_register(&ip6_nh);
+	netisr_register(&ip6_nh);
 }
 
 static int
 ip6_init2_vnet(const void *unused __unused)
 {
 	INIT_VNET_INET6(curvnet);
 
 	/* nd6_timer_init */
 	callout_init(&V_nd6_timer_ch, 0);
 	callout_reset(&V_nd6_timer_ch, hz, nd6_timer, curvnet);
 
 	/* timer for regeneranation of temporary addresses randomize ID */
 	callout_init(&V_in6_tmpaddrtimer_ch, 0);
 	callout_reset(&V_in6_tmpaddrtimer_ch,
 		      (V_ip6_temp_preferred_lifetime - V_ip6_desync_factor -
 		       V_ip6_temp_regen_advance) * hz,
 		      in6_tmpaddrtimer, curvnet);
 
 	return (0);
 }
 
 static void
 ip6_init2(void *dummy)
 {
 
 	ip6_init2_vnet(NULL);
 }
 
 /* cheat */
 /* This must be after route_init(), which is now SI_ORDER_THIRD */
 SYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL);
 
 void
 ip6_input(struct mbuf *m)
 {
 	INIT_VNET_NET(curvnet);
 	INIT_VNET_INET6(curvnet);
 	struct ip6_hdr *ip6;
 	int off = sizeof(struct ip6_hdr), nest;
 	u_int32_t plen;
 	u_int32_t rtalert = ~0;
 	int nxt, ours = 0;
 	struct ifnet *deliverifp = NULL, *ifp = NULL;
 	struct in6_addr odst;
 	struct route_in6 rin6;
 	int srcrt = 0;
 	struct llentry *lle = NULL;
 	struct sockaddr_in6 dst6, *dst;
 
 	bzero(&rin6, sizeof(struct route_in6));
 #ifdef IPSEC
 	/*
 	 * should the inner packet be considered authentic?
 	 * see comment in ah4_input().
 	 * NB: m cannot be NULL when passed to the input routine
 	 */
 
 	m->m_flags &= ~M_AUTHIPHDR;
 	m->m_flags &= ~M_AUTHIPDGM;
 
 #endif /* IPSEC */
 
 	/*
 	 * make sure we don't have onion peering information into m_tag.
 	 */
 	ip6_delaux(m);
 
 	/*
 	 * mbuf statistics
 	 */
 	if (m->m_flags & M_EXT) {
 		if (m->m_next)
 			V_ip6stat.ip6s_mext2m++;
 		else
 			V_ip6stat.ip6s_mext1++;
 	} else {
 #define M2MMAX	(sizeof(V_ip6stat.ip6s_m2m)/sizeof(V_ip6stat.ip6s_m2m[0]))
 		if (m->m_next) {
 			if (m->m_flags & M_LOOP) {
 				V_ip6stat.ip6s_m2m[V_loif[0].if_index]++; /* XXX */
 			} else if (m->m_pkthdr.rcvif->if_index < M2MMAX)
 				V_ip6stat.ip6s_m2m[m->m_pkthdr.rcvif->if_index]++;
 			else
 				V_ip6stat.ip6s_m2m[0]++;
 		} else
 			V_ip6stat.ip6s_m1++;
 #undef M2MMAX
 	}
 
 	/* drop the packet if IPv6 operation is disabled on the IF */
 	if ((ND_IFINFO(m->m_pkthdr.rcvif)->flags & ND6_IFF_IFDISABLED)) {
 		m_freem(m);
 		return;
 	}
 
 	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive);
 	V_ip6stat.ip6s_total++;
 
 #ifndef PULLDOWN_TEST
 	/*
 	 * L2 bridge code and some other code can return mbuf chain
 	 * that does not conform to KAME requirement.  too bad.
 	 * XXX: fails to join if interface MTU > MCLBYTES.  jumbogram?
 	 */
 	if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) {
 		struct mbuf *n;
 
 		MGETHDR(n, M_DONTWAIT, MT_HEADER);
 		if (n)
 			M_MOVE_PKTHDR(n, m);
 		if (n && n->m_pkthdr.len > MHLEN) {
 			MCLGET(n, M_DONTWAIT);
 			if ((n->m_flags & M_EXT) == 0) {
 				m_freem(n);
 				n = NULL;
 			}
 		}
 		if (n == NULL) {
 			m_freem(m);
 			return;	/* ENOBUFS */
 		}
 
 		m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t));
 		n->m_len = n->m_pkthdr.len;
 		m_freem(m);
 		m = n;
 	}
 	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), /* nothing */);
 #endif
 
 	if (m->m_len < sizeof(struct ip6_hdr)) {
 		struct ifnet *inifp;
 		inifp = m->m_pkthdr.rcvif;
 		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
 			V_ip6stat.ip6s_toosmall++;
 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
 			return;
 		}
 	}
 
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
 		V_ip6stat.ip6s_badvers++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
 		goto bad;
 	}
 
 	V_ip6stat.ip6s_nxthist[ip6->ip6_nxt]++;
 
 	/*
 	 * Check against address spoofing/corruption.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src) ||
 	    IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) {
 		/*
 		 * XXX: "badscope" is not very suitable for a multicast source.
 		 */
 		V_ip6stat.ip6s_badscope++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 	if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) &&
 	    !(m->m_flags & M_LOOP)) {
 		/*
 		 * In this case, the packet should come from the loopback
 		 * interface.  However, we cannot just check the if_flags,
 		 * because ip6_mloopback() passes the "actual" interface
 		 * as the outgoing/incoming interface.
 		 */
 		V_ip6stat.ip6s_badscope++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 
 #ifdef ALTQ
 	if (altq_input != NULL && (*altq_input)(m, AF_INET6) == 0) {
 		/* packet is dropped by traffic conditioner */
 		return;
 	}
 #endif
 	/*
 	 * The following check is not documented in specs.  A malicious
 	 * party may be able to use IPv4 mapped addr to confuse tcp/udp stack
 	 * and bypass security checks (act as if it was from 127.0.0.1 by using
 	 * IPv6 src ::ffff:127.0.0.1).  Be cautious.
 	 *
 	 * This check chokes if we are in an SIIT cloud.  As none of BSDs
 	 * support IPv4-less kernel compilation, we cannot support SIIT
 	 * environment at all.  So, it makes more sense for us to reject any
 	 * malicious packets for non-SIIT environment, than try to do a
 	 * partial support for SIIT environment.
 	 */
 	if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
 	    IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
 		V_ip6stat.ip6s_badscope++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 #if 0
 	/*
 	 * Reject packets with IPv4 compatible addresses (auto tunnel).
 	 *
 	 * The code forbids auto tunnel relay case in RFC1933 (the check is
 	 * stronger than RFC1933).  We may want to re-enable it if mech-xx
 	 * is revised to forbid relaying case.
 	 */
 	if (IN6_IS_ADDR_V4COMPAT(&ip6->ip6_src) ||
 	    IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) {
 		V_ip6stat.ip6s_badscope++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 #endif
 
 	/*
 	 * Run through list of hooks for input packets.
 	 *
 	 * NB: Beware of the destination address changing
 	 *     (e.g. by NAT rewriting).  When this happens,
 	 *     tell ip6_forward to do the right thing.
 	 */
 	odst = ip6->ip6_dst;
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (!PFIL_HOOKED(&inet6_pfil_hook))
 		goto passin;
 
 	if (pfil_run_hooks(&inet6_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN, NULL))
 		return;
 	if (m == NULL)			/* consumed by filter */
 		return;
 	ip6 = mtod(m, struct ip6_hdr *);
 	srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst);
 
 passin:
 	/*
 	 * Disambiguate address scope zones (if there is ambiguity).
 	 * We first make sure that the original source or destination address
 	 * is not in our internal form for scoped addresses.  Such addresses
 	 * are not necessarily invalid spec-wise, but we cannot accept them due
 	 * to the usage conflict.
 	 * in6_setscope() then also checks and rejects the cases where src or
 	 * dst are the loopback address and the receiving interface
 	 * is not loopback.
 	 */
 	if (in6_clearscope(&ip6->ip6_src) || in6_clearscope(&ip6->ip6_dst)) {
 		V_ip6stat.ip6s_badscope++; /* XXX */
 		goto bad;
 	}
 	if (in6_setscope(&ip6->ip6_src, m->m_pkthdr.rcvif, NULL) ||
 	    in6_setscope(&ip6->ip6_dst, m->m_pkthdr.rcvif, NULL)) {
 		V_ip6stat.ip6s_badscope++;
 		goto bad;
 	}
 
 	/*
 	 * Multicast check. Assume packet is for us to avoid
 	 * prematurely taking locks.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		ours = 1;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast);
 		deliverifp = m->m_pkthdr.rcvif;
 		goto hbhcheck;
 	}
 
 	/*
 	 *  Unicast check
 	 */
 
 	bzero(&dst6, sizeof(dst6));
 	dst6.sin6_family = AF_INET6;
 	dst6.sin6_len = sizeof(struct sockaddr_in6);
 	dst6.sin6_addr = ip6->ip6_dst;
 	ifp = m->m_pkthdr.rcvif;
 	IF_AFDATA_LOCK(ifp);
 	lle = lla_lookup(LLTABLE6(ifp), 0,
 	     (struct sockaddr *)&dst6);
 	IF_AFDATA_UNLOCK(ifp);
 	if ((lle != NULL) && (lle->la_flags & LLE_IFADDR)) {
 		ours = 1;
 		deliverifp = ifp;
 		LLE_RUNLOCK(lle);
 		goto hbhcheck;
 	}
 	if (lle != NULL)
 		LLE_RUNLOCK(lle);
 
 	dst = &rin6.ro_dst;
 	dst->sin6_len = sizeof(struct sockaddr_in6);
 	dst->sin6_family = AF_INET6;
 	dst->sin6_addr = ip6->ip6_dst;
 	rin6.ro_rt = rtalloc1((struct sockaddr *)dst, 0, 0);
 	if (rin6.ro_rt)
 		RT_UNLOCK(rin6.ro_rt);
 
 #define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key))
 
 	/*
 	 * Accept the packet if the forwarding interface to the destination
 	 * according to the routing table is the loopback interface,
 	 * unless the associated route has a gateway.
 	 * Note that this approach causes to accept a packet if there is a
 	 * route to the loopback interface for the destination of the packet.
 	 * But we think it's even useful in some situations, e.g. when using
 	 * a special daemon which wants to intercept the packet.
 	 *
 	 * XXX: some OSes automatically make a cloned route for the destination
 	 * of an outgoing packet.  If the outgoing interface of the packet
 	 * is a loopback one, the kernel would consider the packet to be
 	 * accepted, even if we have no such address assinged on the interface.
 	 * We check the cloned flag of the route entry to reject such cases,
 	 * assuming that route entries for our own addresses are not made by
 	 * cloning (it should be true because in6_addloop explicitly installs
 	 * the host route).  However, we might have to do an explicit check
 	 * while it would be less efficient.  Or, should we rather install a
 	 * reject route for such a case?
 	 */
 	if (rin6.ro_rt &&
 	    (rin6.ro_rt->rt_flags &
 	     (RTF_HOST|RTF_GATEWAY)) == RTF_HOST &&
 #ifdef RTF_WASCLONED
 	    !(rin6.ro_rt->rt_flags & RTF_WASCLONED) &&
 #endif
 #ifdef RTF_CLONED
 	    !(rin6.ro_rt->rt_flags & RTF_CLONED) &&
 #endif
 #if 0
 	    /*
 	     * The check below is redundant since the comparison of
 	     * the destination and the key of the rtentry has
 	     * already done through looking up the routing table.
 	     */
 	    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
 	    &rt6_key(rin6.ro_rt)->sin6_addr)
 #endif
 	    rin6.ro_rt->rt_ifp->if_type == IFT_LOOP) {
 		struct in6_ifaddr *ia6 =
 			(struct in6_ifaddr *)rin6.ro_rt->rt_ifa;
 
 		/*
 		 * record address information into m_tag.
 		 */
 		(void)ip6_setdstifaddr(m, ia6);
 
 		/*
 		 * packets to a tentative, duplicated, or somehow invalid
 		 * address must not be accepted.
 		 */
 		if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) {
 			/* this address is ready */
 			ours = 1;
 			deliverifp = ia6->ia_ifp;	/* correct? */
 			/* Count the packet in the ip address stats */
 			ia6->ia_ifa.if_ipackets++;
 			ia6->ia_ifa.if_ibytes += m->m_pkthdr.len;
 			goto hbhcheck;
 		} else {
 			char ip6bufs[INET6_ADDRSTRLEN];
 			char ip6bufd[INET6_ADDRSTRLEN];
 			/* address is not ready, so discard the packet. */
 			nd6log((LOG_INFO,
 			    "ip6_input: packet to an unready address %s->%s\n",
 			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			    ip6_sprintf(ip6bufd, &ip6->ip6_dst)));
 
 			goto bad;
 		}
 	}
 
 	/*
 	 * FAITH (Firewall Aided Internet Translator)
 	 */
 	if (V_ip6_keepfaith) {
 		if (rin6.ro_rt && rin6.ro_rt->rt_ifp &&
 		    rin6.ro_rt->rt_ifp->if_type == IFT_FAITH) {
 			/* XXX do we need more sanity checks? */
 			ours = 1;
 			deliverifp = rin6.ro_rt->rt_ifp; /* faith */
 			goto hbhcheck;
 		}
 	}
 
 	/*
 	 * Now there is no reason to process the packet if it's not our own
 	 * and we're not a router.
 	 */
 	if (!V_ip6_forwarding) {
 		V_ip6stat.ip6s_cantforward++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
 		goto bad;
 	}
 
   hbhcheck:
 	/*
 	 * record address information into m_tag, if we don't have one yet.
 	 * note that we are unable to record it, if the address is not listed
 	 * as our interface address (e.g. multicast addresses, addresses
 	 * within FAITH prefixes and such).
 	 */
 	if (deliverifp && !ip6_getdstifaddr(m)) {
 		struct in6_ifaddr *ia6;
 
 		ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst);
 		if (ia6) {
 			if (!ip6_setdstifaddr(m, ia6)) {
 				/*
 				 * XXX maybe we should drop the packet here,
 				 * as we could not provide enough information
 				 * to the upper layers.
 				 */
 			}
 		}
 	}
 
 	/*
 	 * Process Hop-by-Hop options header if it's contained.
 	 * m may be modified in ip6_hopopts_input().
 	 * If a JumboPayload option is included, plen will also be modified.
 	 */
 	plen = (u_int32_t)ntohs(ip6->ip6_plen);
 	if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
 		struct ip6_hbh *hbh;
 
 		if (ip6_hopopts_input(&plen, &rtalert, &m, &off)) {
 #if 0	/*touches NULL pointer*/
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
 #endif
 			goto out;	/* m have already been freed */
 		}
 
 		/* adjust pointer */
 		ip6 = mtod(m, struct ip6_hdr *);
 
 		/*
 		 * if the payload length field is 0 and the next header field
 		 * indicates Hop-by-Hop Options header, then a Jumbo Payload
 		 * option MUST be included.
 		 */
 		if (ip6->ip6_plen == 0 && plen == 0) {
 			/*
 			 * Note that if a valid jumbo payload option is
 			 * contained, ip6_hopopts_input() must set a valid
 			 * (non-zero) payload length to the variable plen.
 			 */
 			V_ip6stat.ip6s_badoptions++;
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
 			icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    (caddr_t)&ip6->ip6_plen - (caddr_t)ip6);
 			goto out;
 		}
 #ifndef PULLDOWN_TEST
 		/* ip6_hopopts_input() ensures that mbuf is contiguous */
 		hbh = (struct ip6_hbh *)(ip6 + 1);
 #else
 		IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
 			sizeof(struct ip6_hbh));
 		if (hbh == NULL) {
 			V_ip6stat.ip6s_tooshort++;
 			goto out;
 		}
 #endif
 		nxt = hbh->ip6h_nxt;
 
 		/*
 		 * If we are acting as a router and the packet contains a
 		 * router alert option, see if we know the option value.
 		 * Currently, we only support the option value for MLD, in which
 		 * case we should pass the packet to the multicast routing
 		 * daemon.
 		 */
 		if (rtalert != ~0) {
 			switch (rtalert) {
 			case IP6OPT_RTALERT_MLD:
 				if (V_ip6_forwarding)
 					ours = 1;
 				break;
 			default:
 				/*
 				 * RFC2711 requires unrecognized values must be
 				 * silently ignored.
 				 */
 				break;
 			}
 		}
 	} else
 		nxt = ip6->ip6_nxt;
 
 	/*
 	 * Check that the amount of data in the buffers
 	 * is as at least much as the IPv6 header would have us expect.
 	 * Trim mbufs if longer than we expect.
 	 * Drop packet if shorter than we expect.
 	 */
 	if (m->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) {
 		V_ip6stat.ip6s_tooshort++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
 		goto bad;
 	}
 	if (m->m_pkthdr.len > sizeof(struct ip6_hdr) + plen) {
 		if (m->m_len == m->m_pkthdr.len) {
 			m->m_len = sizeof(struct ip6_hdr) + plen;
 			m->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
 		} else
 			m_adj(m, sizeof(struct ip6_hdr) + plen - m->m_pkthdr.len);
 	}
 
 	/*
 	 * Forward if desirable.
 	 */
 	if (V_ip6_mrouter &&
 	    IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		/*
 		 * If we are acting as a multicast router, all
 		 * incoming multicast packets are passed to the
 		 * kernel-level multicast forwarding function.
 		 * The packet is returned (relatively) intact; if
 		 * ip6_mforward() returns a non-zero value, the packet
 		 * must be discarded, else it may be accepted below.
 		 *
 		 * XXX TODO: Check hlim and multicast scope here to avoid
 		 * unnecessarily calling into ip6_mforward().
 		 */
 		if (ip6_mforward &&
 		    ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) {
 			IP6STAT_INC(ip6s_cantforward);
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
 			goto bad;
 		}
 	} else if (!ours) {
 		ip6_forward(m, srcrt);
 		goto out;
 	}
 
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/*
 	 * Malicious party may be able to use IPv4 mapped addr to confuse
 	 * tcp/udp stack and bypass security checks (act as if it was from
 	 * 127.0.0.1 by using IPv6 src ::ffff:127.0.0.1).  Be cautious.
 	 *
 	 * For SIIT end node behavior, you may want to disable the check.
 	 * However, you will  become vulnerable to attacks using IPv4 mapped
 	 * source.
 	 */
 	if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
 	    IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
 		V_ip6stat.ip6s_badscope++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 
 	/*
 	 * Tell launch routine the next header
 	 */
 	V_ip6stat.ip6s_delivered++;
 	in6_ifstat_inc(deliverifp, ifs6_in_deliver);
 	nest = 0;
 
 	while (nxt != IPPROTO_DONE) {
 		if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) {
 			V_ip6stat.ip6s_toomanyhdr++;
 			goto bad;
 		}
 
 		/*
 		 * protection against faulty packet - there should be
 		 * more sanity checks in header chain processing.
 		 */
 		if (m->m_pkthdr.len < off) {
 			V_ip6stat.ip6s_tooshort++;
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
 			goto bad;
 		}
 
 #ifdef IPSEC
 		/*
 		 * enforce IPsec policy checking if we are seeing last header.
 		 * note that we do not visit this with protocols with pcb layer
 		 * code - like udp/tcp/raw ip.
 		 */
 		if (ip6_ipsec_input(m, nxt))
 			goto bad;
 #endif /* IPSEC */
 
 		/*
 		 * Use mbuf flags to propagate Router Alert option to
 		 * ICMPv6 layer, as hop-by-hop options have been stripped.
 		 */
 		if (nxt == IPPROTO_ICMPV6 && rtalert != ~0)
 			m->m_flags |= M_RTALERT_MLD;
 
 		nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
 	}
 	goto out;
 bad:
 	m_freem(m);
 out:
 	if (rin6.ro_rt)
 		RTFREE(rin6.ro_rt);
 }
 
 /*
  * set/grab in6_ifaddr correspond to IPv6 destination address.
  * XXX backward compatibility wrapper
  */
 static struct ip6aux *
 ip6_setdstifaddr(struct mbuf *m, struct in6_ifaddr *ia6)
 {
 	struct ip6aux *ip6a;
 
 	ip6a = ip6_addaux(m);
 	if (ip6a)
 		ip6a->ip6a_dstia6 = ia6;
 	return ip6a;	/* NULL if failed to set */
 }
 
 struct in6_ifaddr *
 ip6_getdstifaddr(struct mbuf *m)
 {
 	struct ip6aux *ip6a;
 
 	ip6a = ip6_findaux(m);
 	if (ip6a)
 		return ip6a->ip6a_dstia6;
 	else
 		return NULL;
 }
 
 /*
  * Hop-by-Hop options header processing. If a valid jumbo payload option is
  * included, the real payload length will be stored in plenp.
  *
  * rtalertp - XXX: should be stored more smart way
  */
 static int
 ip6_hopopts_input(u_int32_t *plenp, u_int32_t *rtalertp,
     struct mbuf **mp, int *offp)
 {
 	INIT_VNET_INET6(curvnet);
 	struct mbuf *m = *mp;
 	int off = *offp, hbhlen;
 	struct ip6_hbh *hbh;
 	u_int8_t *opt;
 
 	/* validation of the length of the header */
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, sizeof(*hbh), -1);
 	hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
 	hbhlen = (hbh->ip6h_len + 1) << 3;
 
 	IP6_EXTHDR_CHECK(m, off, hbhlen, -1);
 	hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
 #else
 	IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m,
 		sizeof(struct ip6_hdr), sizeof(struct ip6_hbh));
 	if (hbh == NULL) {
 		V_ip6stat.ip6s_tooshort++;
 		return -1;
 	}
 	hbhlen = (hbh->ip6h_len + 1) << 3;
 	IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
 		hbhlen);
 	if (hbh == NULL) {
 		V_ip6stat.ip6s_tooshort++;
 		return -1;
 	}
 #endif
 	off += hbhlen;
 	hbhlen -= sizeof(struct ip6_hbh);
 	opt = (u_int8_t *)hbh + sizeof(struct ip6_hbh);
 
 	if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh),
 				hbhlen, rtalertp, plenp) < 0)
 		return (-1);
 
 	*offp = off;
 	*mp = m;
 	return (0);
 }
 
 /*
  * Search header for all Hop-by-hop options and process each option.
  * This function is separate from ip6_hopopts_input() in order to
  * handle a case where the sending node itself process its hop-by-hop
  * options header. In such a case, the function is called from ip6_output().
  *
  * The function assumes that hbh header is located right after the IPv6 header
  * (RFC2460 p7), opthead is pointer into data content in m, and opthead to
  * opthead + hbhlen is located in continuous memory region.
  */
 int
 ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen,
     u_int32_t *rtalertp, u_int32_t *plenp)
 {
 	INIT_VNET_INET6(curvnet);
 	struct ip6_hdr *ip6;
 	int optlen = 0;
 	u_int8_t *opt = opthead;
 	u_int16_t rtalert_val;
 	u_int32_t jumboplen;
 	const int erroff = sizeof(struct ip6_hdr) + sizeof(struct ip6_hbh);
 
 	for (; hbhlen > 0; hbhlen -= optlen, opt += optlen) {
 		switch (*opt) {
 		case IP6OPT_PAD1:
 			optlen = 1;
 			break;
 		case IP6OPT_PADN:
 			if (hbhlen < IP6OPT_MINLEN) {
 				V_ip6stat.ip6s_toosmall++;
 				goto bad;
 			}
 			optlen = *(opt + 1) + 2;
 			break;
 		case IP6OPT_ROUTER_ALERT:
 			/* XXX may need check for alignment */
 			if (hbhlen < IP6OPT_RTALERT_LEN) {
 				V_ip6stat.ip6s_toosmall++;
 				goto bad;
 			}
 			if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) {
 				/* XXX stat */
 				icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    erroff + opt + 1 - opthead);
 				return (-1);
 			}
 			optlen = IP6OPT_RTALERT_LEN;
 			bcopy((caddr_t)(opt + 2), (caddr_t)&rtalert_val, 2);
 			*rtalertp = ntohs(rtalert_val);
 			break;
 		case IP6OPT_JUMBO:
 			/* XXX may need check for alignment */
 			if (hbhlen < IP6OPT_JUMBO_LEN) {
 				V_ip6stat.ip6s_toosmall++;
 				goto bad;
 			}
 			if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) {
 				/* XXX stat */
 				icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    erroff + opt + 1 - opthead);
 				return (-1);
 			}
 			optlen = IP6OPT_JUMBO_LEN;
 
 			/*
 			 * IPv6 packets that have non 0 payload length
 			 * must not contain a jumbo payload option.
 			 */
 			ip6 = mtod(m, struct ip6_hdr *);
 			if (ip6->ip6_plen) {
 				V_ip6stat.ip6s_badoptions++;
 				icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    erroff + opt - opthead);
 				return (-1);
 			}
 
 			/*
 			 * We may see jumbolen in unaligned location, so
 			 * we'd need to perform bcopy().
 			 */
 			bcopy(opt + 2, &jumboplen, sizeof(jumboplen));
 			jumboplen = (u_int32_t)htonl(jumboplen);
 
 #if 1
 			/*
 			 * if there are multiple jumbo payload options,
 			 * *plenp will be non-zero and the packet will be
 			 * rejected.
 			 * the behavior may need some debate in ipngwg -
 			 * multiple options does not make sense, however,
 			 * there's no explicit mention in specification.
 			 */
 			if (*plenp != 0) {
 				V_ip6stat.ip6s_badoptions++;
 				icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    erroff + opt + 2 - opthead);
 				return (-1);
 			}
 #endif
 
 			/*
 			 * jumbo payload length must be larger than 65535.
 			 */
 			if (jumboplen <= IPV6_MAXPACKET) {
 				V_ip6stat.ip6s_badoptions++;
 				icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    erroff + opt + 2 - opthead);
 				return (-1);
 			}
 			*plenp = jumboplen;
 
 			break;
 		default:		/* unknown option */
 			if (hbhlen < IP6OPT_MINLEN) {
 				V_ip6stat.ip6s_toosmall++;
 				goto bad;
 			}
 			optlen = ip6_unknown_opt(opt, m,
 			    erroff + opt - opthead);
 			if (optlen == -1)
 				return (-1);
 			optlen += 2;
 			break;
 		}
 	}
 
 	return (0);
 
   bad:
 	m_freem(m);
 	return (-1);
 }
 
 /*
  * Unknown option processing.
  * The third argument `off' is the offset from the IPv6 header to the option,
  * which is necessary if the IPv6 header the and option header and IPv6 header
  * is not continuous in order to return an ICMPv6 error.
  */
 int
 ip6_unknown_opt(u_int8_t *optp, struct mbuf *m, int off)
 {
 	INIT_VNET_INET6(curvnet);
 	struct ip6_hdr *ip6;
 
 	switch (IP6OPT_TYPE(*optp)) {
 	case IP6OPT_TYPE_SKIP: /* ignore the option */
 		return ((int)*(optp + 1));
 	case IP6OPT_TYPE_DISCARD:	/* silently discard */
 		m_freem(m);
 		return (-1);
 	case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */
 		V_ip6stat.ip6s_badoptions++;
 		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off);
 		return (-1);
 	case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */
 		V_ip6stat.ip6s_badoptions++;
 		ip6 = mtod(m, struct ip6_hdr *);
 		if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
 		    (m->m_flags & (M_BCAST|M_MCAST)))
 			m_freem(m);
 		else
 			icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_OPTION, off);
 		return (-1);
 	}
 
 	m_freem(m);		/* XXX: NOTREACHED */
 	return (-1);
 }
 
 /*
  * Create the "control" list for this pcb.
  * These functions will not modify mbuf chain at all.
  *
  * With KAME mbuf chain restriction:
  * The routine will be called from upper layer handlers like tcp6_input().
  * Thus the routine assumes that the caller (tcp6_input) have already
  * called IP6_EXTHDR_CHECK() and all the extension headers are located in the
  * very first mbuf on the mbuf chain.
  *
  * ip6_savecontrol_v4 will handle those options that are possible to be
  * set on a v4-mapped socket.
  * ip6_savecontrol will directly call ip6_savecontrol_v4 to handle those
  * options and handle the v6-only ones itself.
  */
 struct mbuf **
 ip6_savecontrol_v4(struct inpcb *inp, struct mbuf *m, struct mbuf **mp,
     int *v4only)
 {
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 
 #ifdef SO_TIMESTAMP
 	if ((inp->inp_socket->so_options & SO_TIMESTAMP) != 0) {
 		struct timeval tv;
 
 		microtime(&tv);
 		*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
 		    SCM_TIMESTAMP, SOL_SOCKET);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 #endif
 
 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
 		if (v4only != NULL)
 			*v4only = 1;
 		return (mp);
 	}
 
 #define IS2292(inp, x, y)	(((inp)->inp_flags & IN6P_RFC2292) ? (x) : (y))
 	/* RFC 2292 sec. 5 */
 	if ((inp->inp_flags & IN6P_PKTINFO) != 0) {
 		struct in6_pktinfo pi6;
 
 		bcopy(&ip6->ip6_dst, &pi6.ipi6_addr, sizeof(struct in6_addr));
 		in6_clearscope(&pi6.ipi6_addr);	/* XXX */
 		pi6.ipi6_ifindex =
 		    (m && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0;
 
 		*mp = sbcreatecontrol((caddr_t) &pi6,
 		    sizeof(struct in6_pktinfo),
 		    IS2292(inp, IPV6_2292PKTINFO, IPV6_PKTINFO), IPPROTO_IPV6);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 	if ((inp->inp_flags & IN6P_HOPLIMIT) != 0) {
 		int hlim = ip6->ip6_hlim & 0xff;
 
 		*mp = sbcreatecontrol((caddr_t) &hlim, sizeof(int),
 		    IS2292(inp, IPV6_2292HOPLIMIT, IPV6_HOPLIMIT),
 		    IPPROTO_IPV6);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 	if (v4only != NULL)
 		*v4only = 0;
 	return (mp);
 }
 
 void
 ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
 {
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	int v4only = 0;
 
 	mp = ip6_savecontrol_v4(in6p, m, mp, &v4only);
 	if (v4only)
 		return;
 
 	if ((in6p->inp_flags & IN6P_TCLASS) != 0) {
 		u_int32_t flowinfo;
 		int tclass;
 
 		flowinfo = (u_int32_t)ntohl(ip6->ip6_flow & IPV6_FLOWINFO_MASK);
 		flowinfo >>= 20;
 
 		tclass = flowinfo & 0xff;
 		*mp = sbcreatecontrol((caddr_t) &tclass, sizeof(tclass),
 		    IPV6_TCLASS, IPPROTO_IPV6);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 	/*
 	 * IPV6_HOPOPTS socket option.  Recall that we required super-user
 	 * privilege for the option (see ip6_ctloutput), but it might be too
 	 * strict, since there might be some hop-by-hop options which can be
 	 * returned to normal user.
 	 * See also RFC 2292 section 6 (or RFC 3542 section 8).
 	 */
 	if ((in6p->inp_flags & IN6P_HOPOPTS) != 0) {
 		/*
 		 * Check if a hop-by-hop options header is contatined in the
 		 * received packet, and if so, store the options as ancillary
 		 * data. Note that a hop-by-hop options header must be
 		 * just after the IPv6 header, which is assured through the
 		 * IPv6 input processing.
 		 */
 		if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
 			struct ip6_hbh *hbh;
 			int hbhlen = 0;
 #ifdef PULLDOWN_TEST
 			struct mbuf *ext;
 #endif
 
 #ifndef PULLDOWN_TEST
 			hbh = (struct ip6_hbh *)(ip6 + 1);
 			hbhlen = (hbh->ip6h_len + 1) << 3;
 #else
 			ext = ip6_pullexthdr(m, sizeof(struct ip6_hdr),
 			    ip6->ip6_nxt);
 			if (ext == NULL) {
 				V_ip6stat.ip6s_tooshort++;
 				return;
 			}
 			hbh = mtod(ext, struct ip6_hbh *);
 			hbhlen = (hbh->ip6h_len + 1) << 3;
 			if (hbhlen != ext->m_len) {
 				m_freem(ext);
 				V_ip6stat.ip6s_tooshort++;
 				return;
 			}
 #endif
 
 			/*
 			 * XXX: We copy the whole header even if a
 			 * jumbo payload option is included, the option which
 			 * is to be removed before returning according to
 			 * RFC2292.
 			 * Note: this constraint is removed in RFC3542
 			 */
 			*mp = sbcreatecontrol((caddr_t)hbh, hbhlen,
 			    IS2292(in6p, IPV6_2292HOPOPTS, IPV6_HOPOPTS),
 			    IPPROTO_IPV6);
 			if (*mp)
 				mp = &(*mp)->m_next;
 #ifdef PULLDOWN_TEST
 			m_freem(ext);
 #endif
 		}
 	}
 
 	if ((in6p->inp_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) {
 		int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr);
 
 		/*
 		 * Search for destination options headers or routing
 		 * header(s) through the header chain, and stores each
 		 * header as ancillary data.
 		 * Note that the order of the headers remains in
 		 * the chain of ancillary data.
 		 */
 		while (1) {	/* is explicit loop prevention necessary? */
 			struct ip6_ext *ip6e = NULL;
 			int elen;
 #ifdef PULLDOWN_TEST
 			struct mbuf *ext = NULL;
 #endif
 
 			/*
 			 * if it is not an extension header, don't try to
 			 * pull it from the chain.
 			 */
 			switch (nxt) {
 			case IPPROTO_DSTOPTS:
 			case IPPROTO_ROUTING:
 			case IPPROTO_HOPOPTS:
 			case IPPROTO_AH: /* is it possible? */
 				break;
 			default:
 				goto loopend;
 			}
 
 #ifndef PULLDOWN_TEST
 			if (off + sizeof(*ip6e) > m->m_len)
 				goto loopend;
 			ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off);
 			if (nxt == IPPROTO_AH)
 				elen = (ip6e->ip6e_len + 2) << 2;
 			else
 				elen = (ip6e->ip6e_len + 1) << 3;
 			if (off + elen > m->m_len)
 				goto loopend;
 #else
 			ext = ip6_pullexthdr(m, off, nxt);
 			if (ext == NULL) {
 				V_ip6stat.ip6s_tooshort++;
 				return;
 			}
 			ip6e = mtod(ext, struct ip6_ext *);
 			if (nxt == IPPROTO_AH)
 				elen = (ip6e->ip6e_len + 2) << 2;
 			else
 				elen = (ip6e->ip6e_len + 1) << 3;
 			if (elen != ext->m_len) {
 				m_freem(ext);
 				V_ip6stat.ip6s_tooshort++;
 				return;
 			}
 #endif
 
 			switch (nxt) {
 			case IPPROTO_DSTOPTS:
 				if (!(in6p->inp_flags & IN6P_DSTOPTS))
 					break;
 
 				*mp = sbcreatecontrol((caddr_t)ip6e, elen,
 				    IS2292(in6p,
 					IPV6_2292DSTOPTS, IPV6_DSTOPTS),
 				    IPPROTO_IPV6);
 				if (*mp)
 					mp = &(*mp)->m_next;
 				break;
 			case IPPROTO_ROUTING:
 				if (!in6p->inp_flags & IN6P_RTHDR)
 					break;
 
 				*mp = sbcreatecontrol((caddr_t)ip6e, elen,
 				    IS2292(in6p, IPV6_2292RTHDR, IPV6_RTHDR),
 				    IPPROTO_IPV6);
 				if (*mp)
 					mp = &(*mp)->m_next;
 				break;
 			case IPPROTO_HOPOPTS:
 			case IPPROTO_AH: /* is it possible? */
 				break;
 
 			default:
 				/*
 				 * other cases have been filtered in the above.
 				 * none will visit this case.  here we supply
 				 * the code just in case (nxt overwritten or
 				 * other cases).
 				 */
 #ifdef PULLDOWN_TEST
 				m_freem(ext);
 #endif
 				goto loopend;
 
 			}
 
 			/* proceed with the next header. */
 			off += elen;
 			nxt = ip6e->ip6e_nxt;
 			ip6e = NULL;
 #ifdef PULLDOWN_TEST
 			m_freem(ext);
 			ext = NULL;
 #endif
 		}
 	  loopend:
 		;
 	}
 }
 #undef IS2292
 
 void
 ip6_notify_pmtu(struct inpcb *in6p, struct sockaddr_in6 *dst, u_int32_t *mtu)
 {
 	struct socket *so;
 	struct mbuf *m_mtu;
 	struct ip6_mtuinfo mtuctl;
 
 	so =  in6p->inp_socket;
 
 	if (mtu == NULL)
 		return;
 
 #ifdef DIAGNOSTIC
 	if (so == NULL)		/* I believe this is impossible */
 		panic("ip6_notify_pmtu: socket is NULL");
 #endif
 
 	bzero(&mtuctl, sizeof(mtuctl));	/* zero-clear for safety */
 	mtuctl.ip6m_mtu = *mtu;
 	mtuctl.ip6m_addr = *dst;
 	if (sa6_recoverscope(&mtuctl.ip6m_addr))
 		return;
 
 	if ((m_mtu = sbcreatecontrol((caddr_t)&mtuctl, sizeof(mtuctl),
 	    IPV6_PATHMTU, IPPROTO_IPV6)) == NULL)
 		return;
 
 	if (sbappendaddr(&so->so_rcv, (struct sockaddr *)dst, NULL, m_mtu)
 	    == 0) {
 		m_freem(m_mtu);
 		/* XXX: should count statistics */
 	} else
 		sorwakeup(so);
 
 	return;
 }
 
 #ifdef PULLDOWN_TEST
 /*
  * pull single extension header from mbuf chain.  returns single mbuf that
  * contains the result, or NULL on error.
  */
 static struct mbuf *
 ip6_pullexthdr(struct mbuf *m, size_t off, int nxt)
 {
 	struct ip6_ext ip6e;
 	size_t elen;
 	struct mbuf *n;
 
 #ifdef DIAGNOSTIC
 	switch (nxt) {
 	case IPPROTO_DSTOPTS:
 	case IPPROTO_ROUTING:
 	case IPPROTO_HOPOPTS:
 	case IPPROTO_AH: /* is it possible? */
 		break;
 	default:
 		printf("ip6_pullexthdr: invalid nxt=%d\n", nxt);
 	}
 #endif
 
 	m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
 	if (nxt == IPPROTO_AH)
 		elen = (ip6e.ip6e_len + 2) << 2;
 	else
 		elen = (ip6e.ip6e_len + 1) << 3;
 
 	MGET(n, M_DONTWAIT, MT_DATA);
 	if (n && elen >= MLEN) {
 		MCLGET(n, M_DONTWAIT);
 		if ((n->m_flags & M_EXT) == 0) {
 			m_free(n);
 			n = NULL;
 		}
 	}
 	if (!n)
 		return NULL;
 
 	n->m_len = 0;
 	if (elen >= M_TRAILINGSPACE(n)) {
 		m_free(n);
 		return NULL;
 	}
 
 	m_copydata(m, off, elen, mtod(n, caddr_t));
 	n->m_len = elen;
 	return n;
 }
 #endif
 
 /*
  * Get pointer to the previous header followed by the header
  * currently processed.
  * XXX: This function supposes that
  *	M includes all headers,
  *	the next header field and the header length field of each header
  *	are valid, and
  *	the sum of each header length equals to OFF.
  * Because of these assumptions, this function must be called very
  * carefully. Moreover, it will not be used in the near future when
  * we develop `neater' mechanism to process extension headers.
  */
 char *
 ip6_get_prevhdr(struct mbuf *m, int off)
 {
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 
 	if (off == sizeof(struct ip6_hdr))
 		return (&ip6->ip6_nxt);
 	else {
 		int len, nxt;
 		struct ip6_ext *ip6e = NULL;
 
 		nxt = ip6->ip6_nxt;
 		len = sizeof(struct ip6_hdr);
 		while (len < off) {
 			ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + len);
 
 			switch (nxt) {
 			case IPPROTO_FRAGMENT:
 				len += sizeof(struct ip6_frag);
 				break;
 			case IPPROTO_AH:
 				len += (ip6e->ip6e_len + 2) << 2;
 				break;
 			default:
 				len += (ip6e->ip6e_len + 1) << 3;
 				break;
 			}
 			nxt = ip6e->ip6e_nxt;
 		}
 		if (ip6e)
 			return (&ip6e->ip6e_nxt);
 		else
 			return NULL;
 	}
 }
 
 /*
  * get next header offset.  m will be retained.
  */
 int
 ip6_nexthdr(struct mbuf *m, int off, int proto, int *nxtp)
 {
 	struct ip6_hdr ip6;
 	struct ip6_ext ip6e;
 	struct ip6_frag fh;
 
 	/* just in case */
 	if (m == NULL)
 		panic("ip6_nexthdr: m == NULL");
 	if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len < off)
 		return -1;
 
 	switch (proto) {
 	case IPPROTO_IPV6:
 		if (m->m_pkthdr.len < off + sizeof(ip6))
 			return -1;
 		m_copydata(m, off, sizeof(ip6), (caddr_t)&ip6);
 		if (nxtp)
 			*nxtp = ip6.ip6_nxt;
 		off += sizeof(ip6);
 		return off;
 
 	case IPPROTO_FRAGMENT:
 		/*
 		 * terminate parsing if it is not the first fragment,
 		 * it does not make sense to parse through it.
 		 */
 		if (m->m_pkthdr.len < off + sizeof(fh))
 			return -1;
 		m_copydata(m, off, sizeof(fh), (caddr_t)&fh);
 		/* IP6F_OFF_MASK = 0xfff8(BigEndian), 0xf8ff(LittleEndian) */
 		if (fh.ip6f_offlg & IP6F_OFF_MASK)
 			return -1;
 		if (nxtp)
 			*nxtp = fh.ip6f_nxt;
 		off += sizeof(struct ip6_frag);
 		return off;
 
 	case IPPROTO_AH:
 		if (m->m_pkthdr.len < off + sizeof(ip6e))
 			return -1;
 		m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
 		if (nxtp)
 			*nxtp = ip6e.ip6e_nxt;
 		off += (ip6e.ip6e_len + 2) << 2;
 		return off;
 
 	case IPPROTO_HOPOPTS:
 	case IPPROTO_ROUTING:
 	case IPPROTO_DSTOPTS:
 		if (m->m_pkthdr.len < off + sizeof(ip6e))
 			return -1;
 		m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
 		if (nxtp)
 			*nxtp = ip6e.ip6e_nxt;
 		off += (ip6e.ip6e_len + 1) << 3;
 		return off;
 
 	case IPPROTO_NONE:
 	case IPPROTO_ESP:
 	case IPPROTO_IPCOMP:
 		/* give up */
 		return -1;
 
 	default:
 		return -1;
 	}
 
 	return -1;
 }
 
 /*
  * get offset for the last header in the chain.  m will be kept untainted.
  */
 int
 ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp)
 {
 	int newoff;
 	int nxt;
 
 	if (!nxtp) {
 		nxt = -1;
 		nxtp = &nxt;
 	}
 	while (1) {
 		newoff = ip6_nexthdr(m, off, proto, nxtp);
 		if (newoff < 0)
 			return off;
 		else if (newoff < off)
 			return -1;	/* invalid */
 		else if (newoff == off)
 			return newoff;
 
 		off = newoff;
 		proto = *nxtp;
 	}
 }
 
 struct ip6aux *
 ip6_addaux(struct mbuf *m)
 {
 	struct m_tag *mtag;
 
 	mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
 	if (!mtag) {
 		mtag = m_tag_get(PACKET_TAG_IPV6_INPUT, sizeof(struct ip6aux),
 		    M_NOWAIT);
 		if (mtag) {
 			m_tag_prepend(m, mtag);
 			bzero(mtag + 1, sizeof(struct ip6aux));
 		}
 	}
 	return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
 }
 
 struct ip6aux *
 ip6_findaux(struct mbuf *m)
 {
 	struct m_tag *mtag;
 
 	mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
 	return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
 }
 
 void
 ip6_delaux(struct mbuf *m)
 {
 	struct m_tag *mtag;
 
 	mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
 	if (mtag)
 		m_tag_delete(m, mtag);
 }
 
 /*
  * System control for IP6
  */
 
 u_char	inet6ctlerrmap[PRC_NCMDS] = {
 	0,		0,		0,		0,
 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
 	0,		0,		0,		0,
 	ENOPROTOOPT
 };
Index: projects/pnet/sys/netipsec/ipsec_input.c
===================================================================
--- projects/pnet/sys/netipsec/ipsec_input.c	(revision 193105)
+++ projects/pnet/sys/netipsec/ipsec_input.c	(revision 193106)
@@ -1,884 +1,884 @@
 /*	$FreeBSD$	*/
 /*	$OpenBSD: ipsec_input.c,v 1.63 2003/02/20 18:35:43 deraadt Exp $	*/
 /*-
  * The authors of this code are John Ioannidis (ji@tla.org),
  * Angelos D. Keromytis (kermit@csd.uch.gr) and
  * Niels Provos (provos@physnet.uni-hamburg.de).
  *
  * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
  * in November 1995.
  *
  * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
  * by Angelos D. Keromytis.
  *
  * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
  * and Niels Provos.
  *
  * Additional features in 1999 by Angelos D. Keromytis.
  *
  * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
  * Angelos D. Keromytis and Niels Provos.
  * Copyright (c) 2001, Angelos D. Keromytis.
  *
  * Permission to use, copy, and modify this software with or without fee
  * is hereby granted, provided that this entire notice is included in
  * all copies of any software which is or includes a copy or
  * modification of this software.
  * You may use this code under the GNU public license if you so wish. Please
  * contribute changes back to the authors under this freer than GPL license
  * so that we may further the use of strong encryption without limitations to
  * all.
  *
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
  * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
  * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
  * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
  * PURPOSE.
  */
 
 /*
  * IPsec input processing.
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_enc.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/errno.h>
 #include <sys/syslog.h>
 #include <sys/vimage.h>
 
 #include <net/if.h>
 #include <net/pfil.h>
 #include <net/route.h>
 #include <net/netisr.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/in_var.h>
 
 #include <netinet/ip6.h>
 #ifdef INET6
 #include <netinet6/ip6_var.h>
 #endif
 #include <netinet/in_pcb.h>
 #ifdef INET6
 #include <netinet/icmp6.h>
 #include <netinet6/vinet6.h>
 #endif
 
 #include <netipsec/ipsec.h>
 #ifdef INET6
 #include <netipsec/ipsec6.h>
 #endif
 #include <netipsec/ah_var.h>
 #include <netipsec/esp.h>
 #include <netipsec/esp_var.h>
 #include <netipsec/ipcomp_var.h>
 
 #include <netipsec/key.h>
 #include <netipsec/keydb.h>
 
 #include <netipsec/xform.h>
 #include <netinet6/ip6protosw.h>
 
 #include <machine/in_cksum.h>
 #include <machine/stdarg.h>
 
 #ifdef DEV_ENC
 #include <net/if_enc.h>
 #endif
 
 
 #define IPSEC_ISTAT(p,x,y,z) ((p) == IPPROTO_ESP ? (x)++ : \
 			    (p) == IPPROTO_AH ? (y)++ : (z)++)
 
 static void ipsec4_common_ctlinput(int, struct sockaddr *, void *, int);
 
 /*
  * ipsec_common_input gets called when an IPsec-protected packet
  * is received by IPv4 or IPv6.  It's job is to find the right SA
  * and call the appropriate transform.  The transform callback
  * takes care of further processing (like ingress filtering).
  */
 static int
 ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto)
 {
 	INIT_VNET_IPSEC(curvnet);
 	union sockaddr_union dst_address;
 	struct secasvar *sav;
 	u_int32_t spi;
 	int error;
 
 	IPSEC_ISTAT(sproto, V_espstat.esps_input, V_ahstat.ahs_input,
 		V_ipcompstat.ipcomps_input);
 
 	IPSEC_ASSERT(m != NULL, ("null packet"));
 
 	IPSEC_ASSERT(sproto == IPPROTO_ESP || sproto == IPPROTO_AH ||
 		sproto == IPPROTO_IPCOMP,
 		("unexpected security protocol %u", sproto));
 
 	if ((sproto == IPPROTO_ESP && !V_esp_enable) ||
 	    (sproto == IPPROTO_AH && !V_ah_enable) ||
 	    (sproto == IPPROTO_IPCOMP && !V_ipcomp_enable)) {
 		m_freem(m);
 		IPSEC_ISTAT(sproto, V_espstat.esps_pdrops, V_ahstat.ahs_pdrops,
 		    V_ipcompstat.ipcomps_pdrops);
 		return EOPNOTSUPP;
 	}
 
 	if (m->m_pkthdr.len - skip < 2 * sizeof (u_int32_t)) {
 		m_freem(m);
 		IPSEC_ISTAT(sproto, V_espstat.esps_hdrops, V_ahstat.ahs_hdrops,
 		    V_ipcompstat.ipcomps_hdrops);
 		DPRINTF(("%s: packet too small\n", __func__));
 		return EINVAL;
 	}
 
 	/* Retrieve the SPI from the relevant IPsec header */
 	if (sproto == IPPROTO_ESP)
 		m_copydata(m, skip, sizeof(u_int32_t), (caddr_t) &spi);
 	else if (sproto == IPPROTO_AH)
 		m_copydata(m, skip + sizeof(u_int32_t), sizeof(u_int32_t),
 		    (caddr_t) &spi);
 	else if (sproto == IPPROTO_IPCOMP) {
 		u_int16_t cpi;
 		m_copydata(m, skip + sizeof(u_int16_t), sizeof(u_int16_t),
 		    (caddr_t) &cpi);
 		spi = ntohl(htons(cpi));
 	}
 
 	/*
 	 * Find the SA and (indirectly) call the appropriate
 	 * kernel crypto routine. The resulting mbuf chain is a valid
 	 * IP packet ready to go through input processing.
 	 */
 	bzero(&dst_address, sizeof (dst_address));
 	dst_address.sa.sa_family = af;
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		dst_address.sin.sin_len = sizeof(struct sockaddr_in);
 		m_copydata(m, offsetof(struct ip, ip_dst),
 		    sizeof(struct in_addr),
 		    (caddr_t) &dst_address.sin.sin_addr);
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		dst_address.sin6.sin6_len = sizeof(struct sockaddr_in6);
 		m_copydata(m, offsetof(struct ip6_hdr, ip6_dst),
 		    sizeof(struct in6_addr),
 		    (caddr_t) &dst_address.sin6.sin6_addr);
 		break;
 #endif /* INET6 */
 	default:
 		DPRINTF(("%s: unsupported protocol family %u\n", __func__, af));
 		m_freem(m);
 		IPSEC_ISTAT(sproto, V_espstat.esps_nopf, V_ahstat.ahs_nopf,
 		    V_ipcompstat.ipcomps_nopf);
 		return EPFNOSUPPORT;
 	}
 
 	/* NB: only pass dst since key_allocsa follows RFC2401 */
 	sav = KEY_ALLOCSA(&dst_address, sproto, spi);
 	if (sav == NULL) {
 		DPRINTF(("%s: no key association found for SA %s/%08lx/%u\n",
 			  __func__, ipsec_address(&dst_address),
 			  (u_long) ntohl(spi), sproto));
 		IPSEC_ISTAT(sproto, V_espstat.esps_notdb, V_ahstat.ahs_notdb,
 		    V_ipcompstat.ipcomps_notdb);
 		m_freem(m);
 		return ENOENT;
 	}
 
 	if (sav->tdb_xform == NULL) {
 		DPRINTF(("%s: attempted to use uninitialized SA %s/%08lx/%u\n",
 			 __func__, ipsec_address(&dst_address),
 			 (u_long) ntohl(spi), sproto));
 		IPSEC_ISTAT(sproto, V_espstat.esps_noxform, V_ahstat.ahs_noxform,
 		    V_ipcompstat.ipcomps_noxform);
 		KEY_FREESAV(&sav);
 		m_freem(m);
 		return ENXIO;
 	}
 
 	/*
 	 * Call appropriate transform and return -- callback takes care of
 	 * everything else.
 	 */
 	error = (*sav->tdb_xform->xf_input)(m, sav, skip, protoff);
 	KEY_FREESAV(&sav);
 	return error;
 }
 
 #ifdef INET
 /*
  * Common input handler for IPv4 AH, ESP, and IPCOMP.
  */
 int
 ipsec4_common_input(struct mbuf *m, ...)
 {
 	va_list ap;
 	int off, nxt;
 
 	va_start(ap, m);
 	off = va_arg(ap, int);
 	nxt = va_arg(ap, int);
 	va_end(ap);
 
 	return ipsec_common_input(m, off, offsetof(struct ip, ip_p),
 				  AF_INET, nxt);
 }
 
 void
 ah4_input(struct mbuf *m, int off)
 {
 	ipsec4_common_input(m, off, IPPROTO_AH);
 }
 void
 ah4_ctlinput(int cmd, struct sockaddr *sa, void *v)
 {
 	if (sa->sa_family == AF_INET &&
 	    sa->sa_len == sizeof(struct sockaddr_in))
 		ipsec4_common_ctlinput(cmd, sa, v, IPPROTO_AH);
 }
 
 void
 esp4_input(struct mbuf *m, int off)
 {
 	ipsec4_common_input(m, off, IPPROTO_ESP);
 }
 void
 esp4_ctlinput(int cmd, struct sockaddr *sa, void *v)
 {
 	if (sa->sa_family == AF_INET &&
 	    sa->sa_len == sizeof(struct sockaddr_in))
 		ipsec4_common_ctlinput(cmd, sa, v, IPPROTO_ESP);
 }
 
 void
 ipcomp4_input(struct mbuf *m, int off)
 {
 	ipsec4_common_input(m, off, IPPROTO_IPCOMP);
 }
 
 /*
  * IPsec input callback for INET protocols.
  * This routine is called as the transform callback.
  * Takes care of filtering and other sanity checks on
  * the processed packet.
  */
 int
 ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav,
 			int skip, int protoff, struct m_tag *mt)
 {
 	INIT_VNET_IPSEC(curvnet);
 	int prot, af, sproto;
 	struct ip *ip;
 	struct m_tag *mtag;
 	struct tdb_ident *tdbi;
 	struct secasindex *saidx;
 	int error;
 #ifdef INET6
 #ifdef notyet
 	char ip6buf[INET6_ADDRSTRLEN];
 #endif
 #endif
 
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(sav != NULL, ("null SA"));
 	IPSEC_ASSERT(sav->sah != NULL, ("null SAH"));
 	saidx = &sav->sah->saidx;
 	af = saidx->dst.sa.sa_family;
 	IPSEC_ASSERT(af == AF_INET, ("unexpected af %u", af));
 	sproto = saidx->proto;
 	IPSEC_ASSERT(sproto == IPPROTO_ESP || sproto == IPPROTO_AH ||
 		sproto == IPPROTO_IPCOMP,
 		("unexpected security protocol %u", sproto));
 
 	/* Sanity check */
 	if (m == NULL) {
 		DPRINTF(("%s: null mbuf", __func__));
 		IPSEC_ISTAT(sproto, V_espstat.esps_badkcr, V_ahstat.ahs_badkcr,
 		    V_ipcompstat.ipcomps_badkcr);
 		KEY_FREESAV(&sav);
 		return EINVAL;
 	}
 
 	if (skip != 0) {
 		/* Fix IPv4 header */
 		if (m->m_len < skip && (m = m_pullup(m, skip)) == NULL) {
 			DPRINTF(("%s: processing failed for SA %s/%08lx\n",
 			    __func__, ipsec_address(&sav->sah->saidx.dst),
 			    (u_long) ntohl(sav->spi)));
 			IPSEC_ISTAT(sproto, V_espstat.esps_hdrops, V_ahstat.ahs_hdrops,
 			    V_ipcompstat.ipcomps_hdrops);
 			error = ENOBUFS;
 			goto bad;
 		}
 
 		ip = mtod(m, struct ip *);
 		ip->ip_len = htons(m->m_pkthdr.len);
 		ip->ip_off = htons(ip->ip_off);
 		ip->ip_sum = 0;
 		ip->ip_sum = in_cksum(m, ip->ip_hl << 2);
 	} else {
 		ip = mtod(m, struct ip *);
 	}
 	prot = ip->ip_p;
 
 #ifdef notyet
 	/* IP-in-IP encapsulation */
 	if (prot == IPPROTO_IPIP) {
 		struct ip ipn;
 
 		if (m->m_pkthdr.len - skip < sizeof(struct ip)) {
 			IPSEC_ISTAT(sproto, V_espstat.esps_hdrops,
 			    V_ahstat.ahs_hdrops,
 			    V_ipcompstat.ipcomps_hdrops);
 			error = EINVAL;
 			goto bad;
 		}
 		/* ipn will now contain the inner IPv4 header */
 		m_copydata(m, ip->ip_hl << 2, sizeof(struct ip),
 		    (caddr_t) &ipn);
 
 		/* XXX PROXY address isn't recorded in SAH */
 		/*
 		 * Check that the inner source address is the same as
 		 * the proxy address, if available.
 		 */
 		if ((saidx->proxy.sa.sa_family == AF_INET &&
 		    saidx->proxy.sin.sin_addr.s_addr !=
 		    INADDR_ANY &&
 		    ipn.ip_src.s_addr !=
 		    saidx->proxy.sin.sin_addr.s_addr) ||
 		    (saidx->proxy.sa.sa_family != AF_INET &&
 			saidx->proxy.sa.sa_family != 0)) {
 
 			DPRINTF(("%s: inner source address %s doesn't "
 			    "correspond to expected proxy source %s, "
 			    "SA %s/%08lx\n", __func__,
 			    inet_ntoa4(ipn.ip_src),
 			    ipsp_address(saidx->proxy),
 			    ipsp_address(saidx->dst),
 			    (u_long) ntohl(sav->spi)));
 
 			IPSEC_ISTAT(sproto, V_espstat.esps_pdrops,
 			    V_ahstat.ahs_pdrops,
 			    V_ipcompstat.ipcomps_pdrops);
 			error = EACCES;
 			goto bad;
 		}
 	}
 #ifdef INET6
 	/* IPv6-in-IP encapsulation. */
 	if (prot == IPPROTO_IPV6) {
 		struct ip6_hdr ip6n;
 
 		if (m->m_pkthdr.len - skip < sizeof(struct ip6_hdr)) {
 			IPSEC_ISTAT(sproto, V_espstat.esps_hdrops,
 			    V_ahstat.ahs_hdrops,
 			    V_ipcompstat.ipcomps_hdrops);
 			error = EINVAL;
 			goto bad;
 		}
 		/* ip6n will now contain the inner IPv6 header. */
 		m_copydata(m, ip->ip_hl << 2, sizeof(struct ip6_hdr),
 		    (caddr_t) &ip6n);
 
 		/*
 		 * Check that the inner source address is the same as
 		 * the proxy address, if available.
 		 */
 		if ((saidx->proxy.sa.sa_family == AF_INET6 &&
 		    !IN6_IS_ADDR_UNSPECIFIED(&saidx->proxy.sin6.sin6_addr) &&
 		    !IN6_ARE_ADDR_EQUAL(&ip6n.ip6_src,
 			&saidx->proxy.sin6.sin6_addr)) ||
 		    (saidx->proxy.sa.sa_family != AF_INET6 &&
 			saidx->proxy.sa.sa_family != 0)) {
 
 			DPRINTF(("%s: inner source address %s doesn't "
 			    "correspond to expected proxy source %s, "
 			    "SA %s/%08lx\n", __func__,
 			    ip6_sprintf(ip6buf, &ip6n.ip6_src),
 			    ipsec_address(&saidx->proxy),
 			    ipsec_address(&saidx->dst),
 			    (u_long) ntohl(sav->spi)));
 
 			IPSEC_ISTAT(sproto, V_espstat.esps_pdrops,
 			    V_ahstat.ahs_pdrops,
 			    V_ipcompstat.ipcomps_pdrops);
 			error = EACCES;
 			goto bad;
 		}
 	}
 #endif /* INET6 */
 #endif /*XXX*/
 
 	/*
 	 * Record what we've done to the packet (under what SA it was
 	 * processed). If we've been passed an mtag, it means the packet
 	 * was already processed by an ethernet/crypto combo card and
 	 * thus has a tag attached with all the right information, but
 	 * with a PACKET_TAG_IPSEC_IN_CRYPTO_DONE as opposed to
 	 * PACKET_TAG_IPSEC_IN_DONE type; in that case, just change the type.
 	 */
 	if (mt == NULL && sproto != IPPROTO_IPCOMP) {
 		mtag = m_tag_get(PACKET_TAG_IPSEC_IN_DONE,
 		    sizeof(struct tdb_ident), M_NOWAIT);
 		if (mtag == NULL) {
 			DPRINTF(("%s: failed to get tag\n", __func__));
 			IPSEC_ISTAT(sproto, V_espstat.esps_hdrops,
 			    V_ahstat.ahs_hdrops, V_ipcompstat.ipcomps_hdrops);
 			error = ENOMEM;
 			goto bad;
 		}
 
 		tdbi = (struct tdb_ident *)(mtag + 1);
 		bcopy(&saidx->dst, &tdbi->dst, saidx->dst.sa.sa_len);
 		tdbi->proto = sproto;
 		tdbi->spi = sav->spi;
 		/* Cache those two for enc(4) in xform_ipip. */
 		tdbi->alg_auth = sav->alg_auth;
 		tdbi->alg_enc = sav->alg_enc;
 
 		m_tag_prepend(m, mtag);
 	} else if (mt != NULL) {
 		mt->m_tag_id = PACKET_TAG_IPSEC_IN_DONE;
 		/* XXX do we need to mark m_flags??? */
 	}
 
 	key_sa_recordxfer(sav, m);		/* record data transfer */
 
 #ifdef DEV_ENC
 	encif->if_ipackets++;
 	encif->if_ibytes += m->m_pkthdr.len;
 
 	/*
 	 * Pass the mbuf to enc0 for bpf and pfil. We will filter the IPIP
 	 * packet later after it has been decapsulated.
 	 */
 	ipsec_bpf(m, sav, AF_INET, ENC_IN|ENC_BEFORE);
 
 	if (prot != IPPROTO_IPIP)
 		if ((error = ipsec_filter(&m, PFIL_IN, ENC_IN|ENC_BEFORE)) != 0)
 			return (error);
 #endif
 
 	/*
 	 * Re-dispatch via software interrupt.
 	 */
-	if ((error = netisr2_queue_src(NETISR_IP, (uintptr_t)sav, m))) {
+	if ((error = netisr_queue_src(NETISR_IP, (uintptr_t)sav, m))) {
 		IPSEC_ISTAT(sproto, V_espstat.esps_qfull, V_ahstat.ahs_qfull,
 			    V_ipcompstat.ipcomps_qfull);
 
 		DPRINTF(("%s: queue full; proto %u packet dropped\n",
 			__func__, sproto));
 		return error;
 	}
 	return 0;
 bad:
 	m_freem(m);
 	return error;
 }
 
 void
 ipsec4_common_ctlinput(int cmd, struct sockaddr *sa, void *v, int proto)
 {
 	/* XXX nothing just yet */
 }
 #endif /* INET */
 
 #ifdef INET6
 /* IPv6 AH wrapper. */
 int
 ipsec6_common_input(struct mbuf **mp, int *offp, int proto)
 {
 	INIT_VNET_IPSEC(curvnet);
 	int l = 0;
 	int protoff;
 	struct ip6_ext ip6e;
 
 	if (*offp < sizeof(struct ip6_hdr)) {
 		DPRINTF(("%s: bad offset %u\n", __func__, *offp));
 		return IPPROTO_DONE;
 	} else if (*offp == sizeof(struct ip6_hdr)) {
 		protoff = offsetof(struct ip6_hdr, ip6_nxt);
 	} else {
 		/* Chase down the header chain... */
 		protoff = sizeof(struct ip6_hdr);
 
 		do {
 			protoff += l;
 			m_copydata(*mp, protoff, sizeof(ip6e),
 			    (caddr_t) &ip6e);
 
 			if (ip6e.ip6e_nxt == IPPROTO_AH)
 				l = (ip6e.ip6e_len + 2) << 2;
 			else
 				l = (ip6e.ip6e_len + 1) << 3;
 			IPSEC_ASSERT(l > 0, ("l went zero or negative"));
 		} while (protoff + l < *offp);
 
 		/* Malformed packet check */
 		if (protoff + l != *offp) {
 			DPRINTF(("%s: bad packet header chain, protoff %u, "
 				"l %u, off %u\n", __func__, protoff, l, *offp));
 			IPSEC_ISTAT(proto, V_espstat.esps_hdrops,
 				    V_ahstat.ahs_hdrops,
 				    V_ipcompstat.ipcomps_hdrops);
 			m_freem(*mp);
 			*mp = NULL;
 			return IPPROTO_DONE;
 		}
 		protoff += offsetof(struct ip6_ext, ip6e_nxt);
 	}
 	(void) ipsec_common_input(*mp, *offp, protoff, AF_INET6, proto);
 	return IPPROTO_DONE;
 }
 
 /*
  * IPsec input callback, called by the transform callback. Takes care of
  * filtering and other sanity checks on the processed packet.
  */
 int
 ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int protoff,
     struct m_tag *mt)
 {
 	INIT_VNET_INET6(curvnet);
 	INIT_VNET_IPSEC(curvnet);
 	int prot, af, sproto;
 	struct ip6_hdr *ip6;
 	struct m_tag *mtag;
 	struct tdb_ident *tdbi;
 	struct secasindex *saidx;
 	int nxt;
 	u_int8_t nxt8;
 	int error, nest;
 #ifdef notyet
 	char ip6buf[INET6_ADDRSTRLEN];
 #endif
 
 	IPSEC_ASSERT(m != NULL, ("null mbuf"));
 	IPSEC_ASSERT(sav != NULL, ("null SA"));
 	IPSEC_ASSERT(sav->sah != NULL, ("null SAH"));
 	saidx = &sav->sah->saidx;
 	af = saidx->dst.sa.sa_family;
 	IPSEC_ASSERT(af == AF_INET6, ("unexpected af %u", af));
 	sproto = saidx->proto;
 	IPSEC_ASSERT(sproto == IPPROTO_ESP || sproto == IPPROTO_AH ||
 		sproto == IPPROTO_IPCOMP,
 		("unexpected security protocol %u", sproto));
 
 	/* Sanity check */
 	if (m == NULL) {
 		DPRINTF(("%s: null mbuf", __func__));
 		IPSEC_ISTAT(sproto, V_espstat.esps_badkcr, V_ahstat.ahs_badkcr,
 		    V_ipcompstat.ipcomps_badkcr);
 		error = EINVAL;
 		goto bad;
 	}
 
 	/* Fix IPv6 header */
 	if (m->m_len < sizeof(struct ip6_hdr) &&
 	    (m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
 
 		DPRINTF(("%s: processing failed for SA %s/%08lx\n",
 		    __func__, ipsec_address(&sav->sah->saidx.dst),
 		    (u_long) ntohl(sav->spi)));
 
 		IPSEC_ISTAT(sproto, V_espstat.esps_hdrops, V_ahstat.ahs_hdrops,
 		    V_ipcompstat.ipcomps_hdrops);
 		error = EACCES;
 		goto bad;
 	}
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
 
 	/* Save protocol */
 	m_copydata(m, protoff, 1, (unsigned char *) &prot);
 
 #ifdef notyet
 #ifdef INET
 	/* IP-in-IP encapsulation */
 	if (prot == IPPROTO_IPIP) {
 		struct ip ipn;
 
 		if (m->m_pkthdr.len - skip < sizeof(struct ip)) {
 			IPSEC_ISTAT(sproto, V_espstat.esps_hdrops,
 			    V_ahstat.ahs_hdrops,
 			    V_ipcompstat.ipcomps_hdrops);
 			error = EINVAL;
 			goto bad;
 		}
 		/* ipn will now contain the inner IPv4 header */
 		m_copydata(m, skip, sizeof(struct ip), (caddr_t) &ipn);
 
 		/*
 		 * Check that the inner source address is the same as
 		 * the proxy address, if available.
 		 */
 		if ((saidx->proxy.sa.sa_family == AF_INET &&
 		    saidx->proxy.sin.sin_addr.s_addr != INADDR_ANY &&
 		    ipn.ip_src.s_addr != saidx->proxy.sin.sin_addr.s_addr) ||
 		    (saidx->proxy.sa.sa_family != AF_INET &&
 			saidx->proxy.sa.sa_family != 0)) {
 
 			DPRINTF(("%s: inner source address %s doesn't "
 			    "correspond to expected proxy source %s, "
 			    "SA %s/%08lx\n", __func__,
 			    inet_ntoa4(ipn.ip_src),
 			    ipsec_address(&saidx->proxy),
 			    ipsec_address(&saidx->dst),
 			    (u_long) ntohl(sav->spi)));
 
 			IPSEC_ISTATsproto, (V_espstat.esps_pdrops,
 			    V_ahstat.ahs_pdrops, V_ipcompstat.ipcomps_pdrops);
 			error = EACCES;
 			goto bad;
 		}
 	}
 #endif /* INET */
 
 	/* IPv6-in-IP encapsulation */
 	if (prot == IPPROTO_IPV6) {
 		struct ip6_hdr ip6n;
 
 		if (m->m_pkthdr.len - skip < sizeof(struct ip6_hdr)) {
 			IPSEC_ISTAT(sproto, V_espstat.esps_hdrops,
 			    V_ahstat.ahs_hdrops,
 			    V_ipcompstat.ipcomps_hdrops);
 			error = EINVAL;
 			goto bad;
 		}
 		/* ip6n will now contain the inner IPv6 header. */
 		m_copydata(m, skip, sizeof(struct ip6_hdr),
 		    (caddr_t) &ip6n);
 
 		/*
 		 * Check that the inner source address is the same as
 		 * the proxy address, if available.
 		 */
 		if ((saidx->proxy.sa.sa_family == AF_INET6 &&
 		    !IN6_IS_ADDR_UNSPECIFIED(&saidx->proxy.sin6.sin6_addr) &&
 		    !IN6_ARE_ADDR_EQUAL(&ip6n.ip6_src,
 			&saidx->proxy.sin6.sin6_addr)) ||
 		    (saidx->proxy.sa.sa_family != AF_INET6 &&
 			saidx->proxy.sa.sa_family != 0)) {
 
 			DPRINTF(("%s: inner source address %s doesn't "
 			    "correspond to expected proxy source %s, "
 			    "SA %s/%08lx\n", __func__,
 			    ip6_sprintf(ip6buf, &ip6n.ip6_src),
 			    ipsec_address(&saidx->proxy),
 			    ipsec_address(&saidx->dst),
 			    (u_long) ntohl(sav->spi)));
 
 			IPSEC_ISTAT(sproto, V_espstat.esps_pdrops,
 			    V_ahstat.ahs_pdrops, V_ipcompstat.ipcomps_pdrops);
 			error = EACCES;
 			goto bad;
 		}
 	}
 #endif /*XXX*/
 
 	/*
 	 * Record what we've done to the packet (under what SA it was
 	 * processed). If we've been passed an mtag, it means the packet
 	 * was already processed by an ethernet/crypto combo card and
 	 * thus has a tag attached with all the right information, but
 	 * with a PACKET_TAG_IPSEC_IN_CRYPTO_DONE as opposed to
 	 * PACKET_TAG_IPSEC_IN_DONE type; in that case, just change the type.
 	 */
 	if (mt == NULL && sproto != IPPROTO_IPCOMP) {
 		mtag = m_tag_get(PACKET_TAG_IPSEC_IN_DONE,
 		    sizeof(struct tdb_ident), M_NOWAIT);
 		if (mtag == NULL) {
 			DPRINTF(("%s: failed to get tag\n", __func__));
 			IPSEC_ISTAT(sproto, V_espstat.esps_hdrops,
 			    V_ahstat.ahs_hdrops, V_ipcompstat.ipcomps_hdrops);
 			error = ENOMEM;
 			goto bad;
 		}
 
 		tdbi = (struct tdb_ident *)(mtag + 1);
 		bcopy(&saidx->dst, &tdbi->dst, sizeof(union sockaddr_union));
 		tdbi->proto = sproto;
 		tdbi->spi = sav->spi;
 		/* Cache those two for enc(4) in xform_ipip. */
 		tdbi->alg_auth = sav->alg_auth;
 		tdbi->alg_enc = sav->alg_enc;
 
 		m_tag_prepend(m, mtag);
 	} else {
 		if (mt != NULL)
 			mt->m_tag_id = PACKET_TAG_IPSEC_IN_DONE;
 		/* XXX do we need to mark m_flags??? */
 	}
 
 	key_sa_recordxfer(sav, m);
 
 #ifdef DEV_ENC
 	encif->if_ipackets++;
 	encif->if_ibytes += m->m_pkthdr.len;
 
 	/*
 	 * Pass the mbuf to enc0 for bpf and pfil. We will filter the IPIP
 	 * packet later after it has been decapsulated.
 	 */
 	ipsec_bpf(m, sav, AF_INET6, ENC_IN|ENC_BEFORE);
 
 	/* XXX-BZ does not make sense. */
 	if (prot != IPPROTO_IPIP)
 		if ((error = ipsec_filter(&m, PFIL_IN, ENC_IN|ENC_BEFORE)) != 0)
 			return (error);
 #endif
 
 	/* Retrieve new protocol */
 	m_copydata(m, protoff, sizeof(u_int8_t), (caddr_t) &nxt8);
 
 	/*
 	 * See the end of ip6_input for this logic.
 	 * IPPROTO_IPV[46] case will be processed just like other ones
 	 */
 	nest = 0;
 	nxt = nxt8;
 	while (nxt != IPPROTO_DONE) {
 		if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) {
 			V_ip6stat.ip6s_toomanyhdr++;
 			error = EINVAL;
 			goto bad;
 		}
 
 		/*
 		 * Protection against faulty packet - there should be
 		 * more sanity checks in header chain processing.
 		 */
 		if (m->m_pkthdr.len < skip) {
 			V_ip6stat.ip6s_tooshort++;
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
 			error = EINVAL;
 			goto bad;
 		}
 		/*
 		 * Enforce IPsec policy checking if we are seeing last header.
 		 * note that we do not visit this with protocols with pcb layer
 		 * code - like udp/tcp/raw ip.
 		 */
 		if ((inet6sw[ip6_protox[nxt]].pr_flags & PR_LASTHDR) != 0 &&
 		    ipsec6_in_reject(m, NULL)) {
 			error = EINVAL;
 			goto bad;
 		}
 		nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &skip, nxt);
 	}
 	return 0;
 bad:
 	if (m)
 		m_freem(m);
 	return error;
 }
 
 void
 esp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
 {
 	struct ip6ctlparam *ip6cp = NULL;
 	struct mbuf *m = NULL;
 	struct ip6_hdr *ip6;
 	int off;
 
 	if (sa->sa_family != AF_INET6 ||
 	    sa->sa_len != sizeof(struct sockaddr_in6))
 		return;
 	if ((unsigned)cmd >= PRC_NCMDS)
 		return;
 
 	/* if the parameter is from icmp6, decode it. */
 	if (d != NULL) {
 		ip6cp = (struct ip6ctlparam *)d;
 		m = ip6cp->ip6c_m;
 		ip6 = ip6cp->ip6c_ip6;
 		off = ip6cp->ip6c_off;
 	} else {
 		m = NULL;
 		ip6 = NULL;
 		off = 0;	/* calm gcc */
 	}
 
 	if (ip6 != NULL) {
 
 		struct ip6ctlparam ip6cp1;
 
 		/*
 		 * Notify the error to all possible sockets via pfctlinput2.
 		 * Since the upper layer information (such as protocol type,
 		 * source and destination ports) is embedded in the encrypted
 		 * data and might have been cut, we can't directly call
 		 * an upper layer ctlinput function. However, the pcbnotify
 		 * function will consider source and destination addresses
 		 * as well as the flow info value, and may be able to find
 		 * some PCB that should be notified.
 		 * Although pfctlinput2 will call esp6_ctlinput(), there is
 		 * no possibility of an infinite loop of function calls,
 		 * because we don't pass the inner IPv6 header.
 		 */
 		bzero(&ip6cp1, sizeof(ip6cp1));
 		ip6cp1.ip6c_src = ip6cp->ip6c_src;
 		pfctlinput2(cmd, sa, (void *)&ip6cp1);
 
 		/*
 		 * Then go to special cases that need ESP header information.
 		 * XXX: We assume that when ip6 is non NULL,
 		 * M and OFF are valid.
 		 */
 
 		if (cmd == PRC_MSGSIZE) {
 			struct secasvar *sav;
 			u_int32_t spi;
 			int valid;
 
 			/* check header length before using m_copydata */
 			if (m->m_pkthdr.len < off + sizeof (struct esp))
 				return;
 			m_copydata(m, off + offsetof(struct esp, esp_spi),
 				sizeof(u_int32_t), (caddr_t) &spi);
 			/*
 			 * Check to see if we have a valid SA corresponding to
 			 * the address in the ICMP message payload.
 			 */
 			sav = KEY_ALLOCSA((union sockaddr_union *)sa,
 					IPPROTO_ESP, spi);
 			valid = (sav != NULL);
 			if (sav)
 				KEY_FREESAV(&sav);
 
 			/* XXX Further validation? */
 
 			/*
 			 * Depending on whether the SA is "valid" and
 			 * routing table size (mtudisc_{hi,lo}wat), we will:
 			 * - recalcurate the new MTU and create the
 			 *   corresponding routing entry, or
 			 * - ignore the MTU change notification.
 			 */
 			icmp6_mtudisc_update(ip6cp, valid);
 		}
 	} else {
 		/* we normally notify any pcb here */
 	}
 }
 #endif /* INET6 */
Index: projects/pnet/sys/netipx/ipx_input.c
===================================================================
--- projects/pnet/sys/netipx/ipx_input.c	(revision 193105)
+++ projects/pnet/sys/netipx/ipx_input.c	(revision 193106)
@@ -1,506 +1,506 @@
 /*-
  * Copyright (c) 1984, 1985, 1986, 1987, 1993
  *	The Regents of the University of California.
  * Copyright (c) 2004-2005 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Copyright (c) 1995, Mike Mitchell
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ipx_input.c
  */
 
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/kernel.h>
 #include <sys/random.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <net/netisr.h>
 
 #include <netipx/ipx.h>
 #include <netipx/spx.h>
 #include <netipx/ipx_if.h>
 #include <netipx/ipx_pcb.h>
 #include <netipx/ipx_var.h>
 
 int	ipxcksum = 0;
 SYSCTL_INT(_net_ipx_ipx, OID_AUTO, checksum, CTLFLAG_RW,
 	   &ipxcksum, 0, "Compute ipx checksum");
 
 static int	ipxprintfs = 0;		/* printing forwarding information */
 SYSCTL_INT(_net_ipx_ipx, OID_AUTO, ipxprintfs, CTLFLAG_RW,
 	   &ipxprintfs, 0, "Printing forwarding information");
 
 static int	ipxforwarding = 0;
 SYSCTL_INT(_net_ipx_ipx, OID_AUTO, ipxforwarding, CTLFLAG_RW,
 	    &ipxforwarding, 0, "Enable ipx forwarding");
 
 static int	ipxnetbios = 0;
 SYSCTL_INT(_net_ipx, OID_AUTO, ipxnetbios, CTLFLAG_RW,
 	   &ipxnetbios, 0, "Propagate netbios over ipx");
 
 static	int ipx_do_route(struct ipx_addr *src, struct route *ro);
 static	void ipx_undo_route(struct route *ro);
 static	void ipx_forward(struct mbuf *m);
 static	void ipxintr(struct mbuf *m);
 
 const union	ipx_net ipx_zeronet;
 const union	ipx_host ipx_zerohost;
 
 const union	ipx_net	ipx_broadnet = { .s_net[0] = 0xffff,
 					    .s_net[1] = 0xffff };
 const union	ipx_host ipx_broadhost = { .s_host[0] = 0xffff,
 					    .s_host[1] = 0xffff,
 					    .s_host[2] = 0xffff };
 
 struct	ipxstat ipxstat;
 struct	sockaddr_ipx ipx_netmask, ipx_hostmask;
 
 /*
  * IPX protocol control block (pcb) lists.
  */
 struct mtx		ipxpcb_list_mtx;
 struct ipxpcbhead	ipxpcb_list;
 struct ipxpcbhead	ipxrawpcb_list;
 
 static struct netisr_handler ipx_nh = {
 	.nh_name = "ipx",
 	.nh_handler = ipxintr,
 	.nh_proto = NETISR_IPX,
 	.nh_qlimit = IFQ_MAXLEN,
 	.nh_policy = NETISR_POLICY_SOURCE,
 };
 
 long	ipx_pexseq;		/* Locked with ipxpcb_list_mtx. */
 
 /*
  * IPX initialization.
  */
 
 void
 ipx_init(void)
 {
 
 	read_random(&ipx_pexseq, sizeof ipx_pexseq);
 
 	LIST_INIT(&ipxpcb_list);
 	LIST_INIT(&ipxrawpcb_list);
 
 	IPX_LIST_LOCK_INIT();
 
 	ipx_netmask.sipx_len = 6;
 	ipx_netmask.sipx_addr.x_net = ipx_broadnet;
 
 	ipx_hostmask.sipx_len = 12;
 	ipx_hostmask.sipx_addr.x_net = ipx_broadnet;
 	ipx_hostmask.sipx_addr.x_host = ipx_broadhost;
 
-	netisr2_register(&ipx_nh);
+	netisr_register(&ipx_nh);
 }
 
 /*
  * IPX input routine.  Pass to next level.
  */
 static void
 ipxintr(struct mbuf *m)
 {
 	struct ipx *ipx;
 	struct ipxpcb *ipxp;
 	struct ipx_ifaddr *ia;
 	int len;
 
 	/*
 	 * If no IPX addresses have been set yet but the interfaces
 	 * are receiving, can't do anything with incoming packets yet.
 	 */
 	if (ipx_ifaddr == NULL) {
 		m_freem(m);
 		return;
 	}
 
 	ipxstat.ipxs_total++;
 
 	if ((m->m_flags & M_EXT || m->m_len < sizeof(struct ipx)) &&
 	    (m = m_pullup(m, sizeof(struct ipx))) == NULL) {
 		ipxstat.ipxs_toosmall++;
 		return;
 	}
 
 	/*
 	 * Give any raw listeners a crack at the packet
 	 */
 	IPX_LIST_LOCK();
 	LIST_FOREACH(ipxp, &ipxrawpcb_list, ipxp_list) {
 		struct mbuf *m1 = m_copy(m, 0, (int)M_COPYALL);
 		if (m1 != NULL) {
 			IPX_LOCK(ipxp);
 			ipx_input(m1, ipxp);
 			IPX_UNLOCK(ipxp);
 		}
 	}
 	IPX_LIST_UNLOCK();
 
 	ipx = mtod(m, struct ipx *);
 	len = ntohs(ipx->ipx_len);
 	/*
 	 * Check that the amount of data in the buffers
 	 * is as at least much as the IPX header would have us expect.
 	 * Trim mbufs if longer than we expect.
 	 * Drop packet if shorter than we expect.
 	 */
 	if (m->m_pkthdr.len < len) {
 		ipxstat.ipxs_tooshort++;
 		m_freem(m);
 		return;
 	}
 	if (m->m_pkthdr.len > len) {
 		if (m->m_len == m->m_pkthdr.len) {
 			m->m_len = len;
 			m->m_pkthdr.len = len;
 		} else
 			m_adj(m, len - m->m_pkthdr.len);
 	}
 	if (ipxcksum && ipx->ipx_sum != 0xffff) {
 		if (ipx->ipx_sum != ipx_cksum(m, len)) {
 			ipxstat.ipxs_badsum++;
 			m_freem(m);
 			return;
 		}
 	}
 
 	/*
 	 * Propagated (Netbios) packets (type 20) has to be handled
 	 * different. :-(
 	 */
 	if (ipx->ipx_pt == IPXPROTO_NETBIOS) {
 		if (ipxnetbios) {
 			ipx_output_type20(m);
 			return;
 		} else {
 			m_freem(m);
 			return;
 		}
 	}
 
 	/*
 	 * Is this a directed broadcast?
 	 */
 	if (ipx_hosteqnh(ipx_broadhost,ipx->ipx_dna.x_host)) {
 		if ((!ipx_neteq(ipx->ipx_dna, ipx->ipx_sna)) &&
 		    (!ipx_neteqnn(ipx->ipx_dna.x_net, ipx_broadnet)) &&
 		    (!ipx_neteqnn(ipx->ipx_sna.x_net, ipx_zeronet)) &&
 		    (!ipx_neteqnn(ipx->ipx_dna.x_net, ipx_zeronet)) ) {
 			/*
 			 * If it is a broadcast to the net where it was
 			 * received from, treat it as ours.
 			 */
 			for (ia = ipx_ifaddr; ia != NULL; ia = ia->ia_next)
 				if((ia->ia_ifa.ifa_ifp == m->m_pkthdr.rcvif) &&
 				   ipx_neteq(ia->ia_addr.sipx_addr,
 					     ipx->ipx_dna))
 					goto ours;
 
 			/*
 			 * Look to see if I need to eat this packet.
 			 * Algorithm is to forward all young packets
 			 * and prematurely age any packets which will
 			 * by physically broadcasted.
 			 * Any very old packets eaten without forwarding
 			 * would die anyway.
 			 *
 			 * Suggestion of Bill Nesheim, Cornell U.
 			 */
 			if (ipx->ipx_tc < IPX_MAXHOPS) {
 				ipx_forward(m);
 				return;
 			}
 		}
 	/*
 	 * Is this our packet? If not, forward.
 	 */
 	} else {
 		for (ia = ipx_ifaddr; ia != NULL; ia = ia->ia_next)
 			if (ipx_hosteq(ipx->ipx_dna, ia->ia_addr.sipx_addr) &&
 			    (ipx_neteq(ipx->ipx_dna, ia->ia_addr.sipx_addr) ||
 			     ipx_neteqnn(ipx->ipx_dna.x_net, ipx_zeronet)))
 				break;
 
 		if (ia == NULL) {
 			ipx_forward(m);
 			return;
 		}
 	}
 ours:
 	/*
 	 * Locate pcb for datagram.
 	 */
 	IPX_LIST_LOCK();
 	ipxp = ipx_pcblookup(&ipx->ipx_sna, ipx->ipx_dna.x_port, IPX_WILDCARD);
 	/*
 	 * Switch out to protocol's input routine.
 	 */
 	if (ipxp != NULL) {
 		ipxstat.ipxs_delivered++;
 		if ((ipxp->ipxp_flags & IPXP_ALL_PACKETS) == 0)
 			switch (ipx->ipx_pt) {
 			case IPXPROTO_SPX:
 				IPX_LOCK(ipxp);
 				/* Will release both locks. */
 				spx_input(m, ipxp);
 				return;
 			}
 		IPX_LOCK(ipxp);
 		ipx_input(m, ipxp);
 		IPX_UNLOCK(ipxp);
 	} else
 		m_freem(m);
 	IPX_LIST_UNLOCK();
 }
 
 void
 ipx_ctlinput(cmd, arg_as_sa, dummy)
 	int cmd;
 	struct sockaddr *arg_as_sa;	/* XXX should be swapped with dummy */
 	void *dummy;
 {
 
 	/* Currently, nothing. */
 }
 
 /*
  * Forward a packet. If some error occurs drop the packet. IPX don't
  * have a way to return errors to the sender.
  */
 
 static struct route ipx_droute;
 static struct route ipx_sroute;
 
 static void
 ipx_forward(struct mbuf *m)
 {
 	struct ipx *ipx = mtod(m, struct ipx *);
 	int error;
 	int agedelta = 1;
 	int flags = IPX_FORWARDING;
 	int ok_there = 0;
 	int ok_back = 0;
 
 	if (ipxforwarding == 0) {
 		/* can't tell difference between net and host */
 		ipxstat.ipxs_cantforward++;
 		m_freem(m);
 		goto cleanup;
 	}
 	ipx->ipx_tc++;
 	if (ipx->ipx_tc > IPX_MAXHOPS) {
 		ipxstat.ipxs_cantforward++;
 		m_freem(m);
 		goto cleanup;
 	}
 
 	if ((ok_there = ipx_do_route(&ipx->ipx_dna,&ipx_droute)) == 0) {
 		ipxstat.ipxs_noroute++;
 		m_freem(m);
 		goto cleanup;
 	}
 	/*
 	 * Here we think about  forwarding  broadcast packets,
 	 * so we try to insure that it doesn't go back out
 	 * on the interface it came in on.  Also, if we
 	 * are going to physically broadcast this, let us
 	 * age the packet so we can eat it safely the second time around.
 	 */
 	if (ipx->ipx_dna.x_host.c_host[0] & 0x1) {
 		struct ipx_ifaddr *ia = ipx_iaonnetof(&ipx->ipx_dna);
 		struct ifnet *ifp;
 		if (ia != NULL) {
 			/* I'm gonna hafta eat this packet */
 			agedelta += IPX_MAXHOPS - ipx->ipx_tc;
 			ipx->ipx_tc = IPX_MAXHOPS;
 		}
 		if ((ok_back = ipx_do_route(&ipx->ipx_sna,&ipx_sroute)) == 0) {
 			/* error = ENETUNREACH; He'll never get it! */
 			ipxstat.ipxs_noroute++;
 			m_freem(m);
 			goto cleanup;
 		}
 		if (ipx_droute.ro_rt &&
 		    (ifp = ipx_droute.ro_rt->rt_ifp) &&
 		    ipx_sroute.ro_rt &&
 		    (ifp != ipx_sroute.ro_rt->rt_ifp)) {
 			flags |= IPX_ALLOWBROADCAST;
 		} else {
 			ipxstat.ipxs_noroute++;
 			m_freem(m);
 			goto cleanup;
 		}
 	}
 	/*
 	 * We don't need to recompute checksum because ipx_tc field
 	 * is ignored by checksum calculation routine, however
 	 * it may be desirable to reset checksum if ipxcksum == 0
 	 */
 #if 0
 	if (!ipxcksum)
 		ipx->ipx_sum = 0xffff;
 #endif
 
 	error = ipx_outputfl(m, &ipx_droute, flags);
 	if (error == 0) {
 		ipxstat.ipxs_forward++;
 
 		if (ipxprintfs) {
 			printf("forward: ");
 			ipx_printhost(&ipx->ipx_sna);
 			printf(" to ");
 			ipx_printhost(&ipx->ipx_dna);
 			printf(" hops %d\n", ipx->ipx_tc);
 		}
 	} 
 cleanup:
 	if (ok_there)
 		ipx_undo_route(&ipx_droute);
 	if (ok_back)
 		ipx_undo_route(&ipx_sroute);
 }
 
 static int
 ipx_do_route(struct ipx_addr *src, struct route *ro)
 {
 	struct sockaddr_ipx *dst;
 
 	bzero((caddr_t)ro, sizeof(*ro));
 	dst = (struct sockaddr_ipx *)&ro->ro_dst;
 
 	dst->sipx_len = sizeof(*dst);
 	dst->sipx_family = AF_IPX;
 	dst->sipx_addr = *src;
 	dst->sipx_addr.x_port = 0;
 	rtalloc_ign(ro, 0);
 	if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL) {
 		return (0);
 	}
 	ro->ro_rt->rt_use++;
 	return (1);
 }
 
 static void
 ipx_undo_route(struct route *ro)
 {
 
 	if (ro->ro_rt != NULL) {
 		RTFREE(ro->ro_rt);
 	}
 }
 
 /*
  * XXXRW: This code should be run in its own netisr dispatch to avoid a call
  * back into the socket code from the IPX output path.
  */
 void
 ipx_watch_output(struct mbuf *m, struct ifnet *ifp)
 {
 	struct ipxpcb *ipxp;
 	struct ifaddr *ifa;
 	struct ipx_ifaddr *ia;
 
 	/*
 	 * Give any raw listeners a crack at the packet
 	 */
 	IPX_LIST_LOCK();
 	LIST_FOREACH(ipxp, &ipxrawpcb_list, ipxp_list) {
 		struct mbuf *m0 = m_copy(m, 0, (int)M_COPYALL);
 		if (m0 != NULL) {
 			struct ipx *ipx;
 
 			M_PREPEND(m0, sizeof(*ipx), M_DONTWAIT);
 			if (m0 == NULL)
 				continue;
 			ipx = mtod(m0, struct ipx *);
 			ipx->ipx_sna.x_net = ipx_zeronet;
 			for (ia = ipx_ifaddr; ia != NULL; ia = ia->ia_next)
 				if (ifp == ia->ia_ifp)
 					break;
 			if (ia == NULL)
 				ipx->ipx_sna.x_host = ipx_zerohost;
 			else
 				ipx->ipx_sna.x_host =
 				    ia->ia_addr.sipx_addr.x_host;
 
 			if (ifp != NULL && (ifp->if_flags & IFF_POINTOPOINT))
 			    TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 				if (ifa->ifa_addr->sa_family == AF_IPX) {
 				    ipx->ipx_sna = IA_SIPX(ifa)->sipx_addr;
 				    break;
 				}
 			    }
 			ipx->ipx_len = ntohl(m0->m_pkthdr.len);
 			IPX_LOCK(ipxp);
 			ipx_input(m0, ipxp);
 			IPX_UNLOCK(ipxp);
 		}
 	}
 	IPX_LIST_UNLOCK();
 }
Index: projects/pnet/sys/netnatm/natm_proto.c
===================================================================
--- projects/pnet/sys/netnatm/natm_proto.c	(revision 193105)
+++ projects/pnet/sys/netnatm/natm_proto.c	(revision 193106)
@@ -1,114 +1,114 @@
 /*-
  * Copyright (c) 1996 Charles D. Cranor and Washington University.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Charles D. Cranor and
  *      Washington University.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $NetBSD: natm_proto.c,v 1.3 1996/09/18 00:56:41 chuck Exp $
  */
 
 /*
  * protocol layer for access to native mode ATM
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/protosw.h>
 #include <sys/domain.h>
 
 #include <net/if.h>
 #include <net/netisr.h>
 
 #include <netinet/in.h>
 
 #include <netnatm/natm.h>
 
 static	void natm_init(void);
 
 static struct domain natmdomain;
 
 static struct protosw natmsw[] = {
 {
 	.pr_type =		SOCK_STREAM,
 	.pr_domain =		&natmdomain,
 	.pr_protocol =		PROTO_NATMAAL5,
 	.pr_flags =		PR_CONNREQUIRED,
 	.pr_usrreqs =		&natm_usrreqs
 },
 {
 	.pr_type =		SOCK_DGRAM,
 	.pr_domain =		&natmdomain,
 	.pr_protocol =		PROTO_NATMAAL5,
 	.pr_flags =		PR_CONNREQUIRED|PR_ATOMIC,
 	.pr_usrreqs =		&natm_usrreqs
 },
 {
 	.pr_type =		SOCK_STREAM,
 	.pr_domain =		&natmdomain,
 	.pr_protocol =		PROTO_NATMAAL0,
 	.pr_flags =		PR_CONNREQUIRED,
 	.pr_usrreqs =		&natm_usrreqs
 },
 };
 
 static struct domain natmdomain = {
 	.dom_family =		AF_NATM,
 	.dom_name =		"natm",
 	.dom_init =		natm_init,
 	.dom_protosw =		natmsw,
 	.dom_protoswNPROTOSW =	&natmsw[sizeof(natmsw)/sizeof(natmsw[0])],
 };
 
 static struct netisr_handler natm_nh = {
 	.nh_name = "natm",
 	.nh_handler = natmintr,
 	.nh_proto = NETISR_NATM,
 	.nh_qlimit = 1000 /* IFQ_MAXLEN */,
 	.nh_policy = NETISR_POLICY_SOURCE,
 };
 
 #ifdef NATM_STAT
 u_int natm_sodropcnt;		/* # mbufs dropped due to full sb */
 u_int natm_sodropbytes;		/* # of bytes dropped */
 u_int natm_sookcnt;		/* # mbufs ok */
 u_int natm_sookbytes;		/* # of bytes ok */
 #endif
 
 static void
 natm_init(void)
 {
 	LIST_INIT(&natm_pcbs);
 	NATM_LOCK_INIT();
-	netisr2_register(&natm_nh);
+	netisr_register(&natm_nh);
 }
 
 DOMAIN_SET(natm);
Index: projects/pnet/sys/sys/pcpu.h
===================================================================
--- projects/pnet/sys/sys/pcpu.h	(revision 193105)
+++ projects/pnet/sys/sys/pcpu.h	(revision 193106)
@@ -1,143 +1,143 @@
 /*-
  * Copyright (c) 2001 Wind River Systems, Inc.
  * All rights reserved.
  * Written by: John Baldwin <jhb@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_PCPU_H_
 #define	_SYS_PCPU_H_
 
 #ifdef LOCORE
 #error "no assembler-serviceable parts inside"
 #endif
 
 #include <sys/queue.h>
 #include <sys/vmmeter.h>
 #include <sys/resource.h>
 #include <machine/pcpu.h>
 
 struct pcb;
 struct thread;
 
 /* 
  * XXXUPS remove as soon as we have per cpu variable
  * linker sets and  can define rm_queue in _rm_lock.h
 */
 struct rm_queue {
 	struct rm_queue* volatile rmq_next;
 	struct rm_queue* volatile rmq_prev;
 };
 
 #define	PCPU_NAME_LEN (sizeof("CPU ") + sizeof(__XSTRING(MAXCPU) + 1))
 
 
 /*
  * This structure maps out the global data that needs to be kept on a
  * per-cpu basis.  The members are accessed via the PCPU_GET/SET/PTR
  * macros defined in <machine/pcpu.h>.  Machine dependent fields are
  * defined in the PCPU_MD_FIELDS macro defined in <machine/pcpu.h>.
  */
 struct pcpu {
 	struct thread	*pc_curthread;		/* Current thread */
 	struct thread	*pc_idlethread;		/* Idle thread */
 	struct thread	*pc_fpcurthread;	/* Fp state owner */
 	struct thread	*pc_deadthread;		/* Zombie thread or NULL */
 	struct pcb	*pc_curpcb;		/* Current pcb */
 	uint64_t	pc_switchtime;
 	int		pc_switchticks;
 	u_int		pc_cpuid;		/* This cpu number */
 	cpumask_t	pc_cpumask;		/* This cpu mask */
 	cpumask_t	pc_other_cpus;		/* Mask of all other cpus */
 	SLIST_ENTRY(pcpu) pc_allcpu;
 	struct lock_list_entry *pc_spinlocks;
 #ifdef KTR_PERCPU
 	int		pc_ktr_idx;		/* Index into trace table */
 	char		*pc_ktr_buf;
 #endif
 #ifdef KTR
 	char		pc_name[PCPU_NAME_LEN];	/* String name for KTR. */
 #endif
 	struct vmmeter	pc_cnt;			/* VM stats counters */
 	long		pc_cp_time[CPUSTATES];	/* statclock ticks */
 	struct device	*pc_device;
-	void		*pc_netisr2;		/* netisr2 SWI cookie. */
+	void		*pc_netisr;		/* netisr SWI cookie. */
 
 	/* 
 	 * Stuff for read mostly lock
 	 * 
 	 * XXXUPS remove as soon as we have per cpu variable
 	 * linker sets.
 	 */
 	struct rm_queue  pc_rm_queue; 
 
 	/*
 	 * Keep MD fields last, so that CPU-specific variations on a
 	 * single architecture don't result in offset variations of
 	 * the machine-independent fields of the pcpu. Even though
 	 * the pcpu structure is private to the kernel, some ports
 	 * (e.g. lsof, part of gtop) define _KERNEL and include this
 	 * header. While strictly speaking this is wrong, there's no
 	 * reason not to keep the offsets of the MI fields contants.
 	 * If only to make kernel debugging easier...
 	 */
 	PCPU_MD_FIELDS;
 };
 
 #ifdef _KERNEL
 
 SLIST_HEAD(cpuhead, pcpu);
 
 extern struct cpuhead cpuhead;
 
 #define	curcpu		PCPU_GET(cpuid)
 #define	curproc		(curthread->td_proc)
 #ifndef curthread
 #define	curthread	PCPU_GET(curthread)
 #endif
 
 /*
  * Machine dependent callouts.  cpu_pcpu_init() is responsible for
  * initializing machine dependent fields of struct pcpu, and
  * db_show_mdpcpu() is responsible for handling machine dependent
  * fields for the DDB 'show pcpu' command.
  */
 
 extern struct pcpu *cpuid_to_pcpu[MAXCPU];
 
 
 void	cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size);
 void	db_show_mdpcpu(struct pcpu *pcpu);
 
 void	pcpu_destroy(struct pcpu *pcpu);
 struct	pcpu *pcpu_find(u_int cpuid);
 void	pcpu_init(struct pcpu *pcpu, int cpuid, size_t size);
 
 #endif	/* _KERNEL */
 
 #endif /* !_SYS_PCPU_H_ */